[ARVADOS] created: 1.3.0-1462-g30e065c34

Git user git at public.curoverse.com
Thu Aug 8 14:05:07 UTC 2019


        at  30e065c34db0ab9a0e824a77b1ac0a46412598e0 (commit)


commit 30e065c34db0ab9a0e824a77b1ac0a46412598e0
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Thu Aug 8 10:04:43 2019 -0400

    15133: Delete crunch-job & arv-run-pipeline-instance
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/sdk/cli/bin/arv-crunch-job b/sdk/cli/bin/arv-crunch-job
deleted file mode 100755
index 6e4b5e0b1..000000000
--- a/sdk/cli/bin/arv-crunch-job
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env ruby
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-exec File.join(File.dirname(File.realpath(__FILE__)), 'crunch-job'), *ARGV
diff --git a/sdk/cli/bin/arv-run-pipeline-instance b/sdk/cli/bin/arv-run-pipeline-instance
deleted file mode 100755
index 336b1a2c7..000000000
--- a/sdk/cli/bin/arv-run-pipeline-instance
+++ /dev/null
@@ -1,781 +0,0 @@
-#!/usr/bin/env ruby
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-class WhRunPipelineInstance
-end
-
-if RUBY_VERSION < '1.9.3' then
-  abort <<-EOS
-#{$0.gsub(/^\.\//,'')} requires Ruby version 1.9.3 or higher.
-  EOS
-end
-
-begin
-  require 'arvados'
-  require 'rubygems'
-  require 'json'
-  require 'pp'
-  require 'optimist'
-  require 'google/api_client'
-rescue LoadError => l
-  $stderr.puts $:
-  abort <<-EOS
-#{$0}: fatal: #{l.message}
-Some runtime dependencies may be missing.
-Try: gem install arvados pp google-api-client json optimist
-  EOS
-end
-
-def debuglog(message, verbosity=1)
-  $stderr.puts "#{File.split($0).last} #{$$}: #{message}" if $debuglevel >= verbosity
-end
-
-# Parse command line options (the kind that control the behavior of
-# this program, that is, not the pipeline component parameters).
-
-p = Optimist::Parser.new do
-  version __FILE__
-  banner(<<EOF)
-
-Usage:
-  arv-run-pipeline-instance --template TEMPLATE_UUID [options] [--] [parameters]
-  arv-run-pipeline-instance --instance INSTANCE_UUID [options] [--] [parameters]
-
-Parameters:
-  param_name=param_value
-  param_name param_value
-                         Set (or override) the default value for every
-                         pipeline component parameter with the given
-                         name.
-
-  component_name::param_name=param_value
-  component_name::param_name param_value
-  --component_name::param_name=param_value
-  --component_name::param_name param_value
-                         Set the value of a parameter for a single
-                         pipeline component.
-
-Options:
-EOF
-  opt(:dry_run,
-      "Do not start any new jobs or wait for existing jobs to finish. Just find out whether jobs are finished, queued, or running for each component.",
-      :type => :boolean,
-      :short => :n)
-  opt(:status_text,
-      "Store plain text status in given file.",
-      :short => :none,
-      :type => :string,
-      :default => '/dev/stdout')
-  opt(:status_json,
-      "Store json-formatted pipeline in given file.",
-      :short => :none,
-      :type => :string,
-      :default => '/dev/null')
-  opt(:no_wait,
-      "Do not wait for jobs to finish. Just look up status, submit new jobs if needed, and exit.",
-      :short => :none,
-      :type => :boolean)
-  opt(:no_reuse,
-      "Do not reuse existing jobs to satisfy pipeline components. Submit a new job for every component.",
-      :short => :none,
-      :type => :boolean)
-  opt(:debug,
-      "Print extra debugging information on stderr.",
-      :type => :boolean)
-  opt(:debug_level,
-      "Set debug verbosity level.",
-      :short => :none,
-      :type => :integer)
-  opt(:template,
-      "UUID of pipeline template, or path to local pipeline template file.",
-      :short => :none,
-      :type => :string)
-  opt(:instance,
-      "UUID of pipeline instance.",
-      :short => :none,
-      :type => :string)
-  opt(:submit,
-      "Submit the pipeline instance to the server, and exit. Let the Crunch dispatch service satisfy the components by finding/running jobs.",
-      :short => :none,
-      :type => :boolean)
-  opt(:run_pipeline_here,
-      "Manage the pipeline instance in-process. Submit jobs to Crunch as needed. Do not exit until the pipeline finishes (or fails).",
-      :short => :none,
-      :type => :boolean)
-  opt(:run_jobs_here,
-      "Run jobs in the local terminal session instead of submitting them to Crunch. Implies --run-pipeline-here. Note: this results in a significantly different job execution environment, and some Crunch features are not supported. It can be necessary to modify a pipeline in order to make it run this way.",
-      :short => :none,
-      :type => :boolean)
-  opt(:run_here,
-      "Synonym for --run-jobs-here.",
-      :short => :none,
-      :type => :boolean)
-  opt(:description,
-      "Description for the pipeline instance.",
-      :short => :none,
-      :type => :string)
-  opt(:project_uuid,
-      "UUID of the project for the pipeline instance.",
-      short: :none,
-      type: :string)
-  stop_on [:'--']
-end
-$options = Optimist::with_standard_exception_handling p do
-  p.parse ARGV
-end
-$debuglevel = $options[:debug_level] || ($options[:debug] && 1) || 0
-
-$options[:run_jobs_here] ||= $options[:run_here] # old flag name
-$options[:run_pipeline_here] ||= $options[:run_jobs_here] # B requires A
-
-if $options[:instance]
-  if $options[:template] or $options[:submit]
-    abort "#{$0}: syntax error: --instance cannot be combined with --template or --submit."
-  end
-elsif not $options[:template]
-  $stderr.puts "error: you must supply a --template or --instance."
-  p.educate
-  abort
-end
-
-if $options[:run_pipeline_here] == $options[:submit]
-  abort "#{$0}: error: you must supply --run-pipeline-here, --run-jobs-here, or --submit."
-end
-
-# Set up the API client.
-
-$arv = Arvados.new api_version: 'v1'
-$client = $arv.client
-$arvados = $arv.arvados_api
-
-class PipelineInstance
-  def self.find(uuid)
-    result = $client.execute(:api_method => $arvados.pipeline_instances.get,
-                             :parameters => {
-                               :uuid => uuid
-                             },
-                             :authenticated => false,
-                             :headers => {
-                               authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
-                             })
-    j = JSON.parse result.body, :symbolize_names => true
-    unless j.is_a? Hash and j[:uuid]
-      debuglog "Failed to get pipeline_instance: #{j[:errors] rescue nil}", 0
-      nil
-    else
-      debuglog "Retrieved pipeline_instance #{j[:uuid]}"
-      self.new(j)
-    end
-  end
-  def self.create(attributes)
-    result = $client.execute(:api_method => $arvados.pipeline_instances.create,
-                             :body_object => {
-                               :pipeline_instance => attributes
-                             },
-                             :authenticated => false,
-                             :headers => {
-                               authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
-                             })
-    j = JSON.parse result.body, :symbolize_names => true
-    unless j.is_a? Hash and j[:uuid]
-      abort "\n#{Time.now} -- pipeline_template #{@template[:uuid]}\nFailed to create pipeline_instance: #{j[:errors] rescue nil} #{j.inspect}"
-    end
-    debuglog "Created pipeline instance: #{j[:uuid]}"
-    self.new(j)
-  end
-  def save
-    result = $client.execute(:api_method => $arvados.pipeline_instances.update,
-                             :parameters => {
-                               :uuid => @pi[:uuid]
-                             },
-                             :body_object => {
-                               :pipeline_instance => @attributes_to_update
-                             },
-                             :authenticated => false,
-                             :headers => {
-                               authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
-                             })
-    j = JSON.parse result.body, :symbolize_names => true
-    unless j.is_a? Hash and j[:uuid]
-      debuglog "Failed to save pipeline_instance: #{j[:errors] rescue nil}", 0
-      nil
-    else
-      @attributes_to_update = {}
-      @pi = j
-    end
-  end
-  def []=(x,y)
-    @attributes_to_update[x] = y
-    @pi[x] = y
-  end
-  def [](x)
-    @pi[x]
-  end
-
-  def log_stderr(msg)
-    $arv.log.create log: {
-      event_type: 'stderr',
-      object_uuid: self[:uuid],
-      owner_uuid: self[:owner_uuid],
-      properties: {"text" => msg},
-    }
-  end
-
-  protected
-  def initialize(j)
-    @attributes_to_update = {}
-    @pi = j
-  end
-end
-
-class JobCache
-  def self.get(uuid)
-    @cache ||= {}
-    result = $client.execute(:api_method => $arvados.jobs.get,
-                             :parameters => {
-                               :uuid => uuid
-                             },
-                             :authenticated => false,
-                             :headers => {
-                               authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
-                             })
-    @cache[uuid] = JSON.parse result.body, :symbolize_names => true
-  end
-  def self.where(conditions)
-    result = $client.execute(:api_method => $arvados.jobs.list,
-                             :parameters => {
-                               :limit => 10000,
-                               :where => conditions.to_json
-                             },
-                             :authenticated => false,
-                             :headers => {
-                               authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
-                             })
-    list = JSON.parse result.body, :symbolize_names => true
-    if list and list[:items].is_a? Array
-      list[:items]
-    else
-      []
-    end
-  end
-
-  # create() returns [job, exception]. If both job and exception are
-  # nil, there was a non-retryable error and the call should not be
-  # attempted again.
-  def self.create(pipeline, component, job, create_params)
-    @cache ||= {}
-
-    body = {job: no_nil_values(job)}.merge(no_nil_values(create_params))
-
-    result = nil
-    begin
-      result = $client.execute(
-        :api_method => $arvados.jobs.create,
-        :body_object => body,
-        :authenticated => false,
-        :headers => {
-          authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
-        })
-      if result.status == 429 || result.status >= 500
-        raise Exception.new("HTTP status #{result.status}")
-      end
-    rescue Exception => e
-      return nil, e
-    end
-    j = JSON.parse(result.body, :symbolize_names => true) rescue nil
-    if result.status == 200 && j.is_a?(Hash) && j[:uuid]
-      @cache[j[:uuid]] = j
-      return j, nil
-    else
-      errors = j[:errors] rescue []
-      debuglog "create job: [#{result.status}] #{errors.inspect} with attributes #{body}", 0
-
-      msg = ""
-      errors.each do |err|
-        msg += "Error creating job for component #{component}: #{err}\n"
-      end
-      msg += "Job submission was: #{body.to_json}"
-
-      pipeline.log_stderr(msg)
-      return nil, nil
-    end
-  end
-
-  protected
-
-  def self.no_nil_values(hash)
-    hash.reject { |key, value| value.nil? }
-  end
-end
-
-class WhRunPipelineInstance
-  attr_reader :instance
-
-  def initialize(_options)
-    @options = _options
-  end
-
-  def fetch_template(template)
-    if template.match /[^-0-9a-z]/
-      # Doesn't look like a uuid -- use it as a filename.
-      @template = JSON.parse File.read(template), :symbolize_names => true
-    else
-      result = $client.execute(:api_method => $arvados.pipeline_templates.get,
-                               :parameters => {
-                                 :uuid => template
-                               },
-                               :authenticated => false,
-                               :headers => {
-                                 authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
-                               })
-      @template = JSON.parse result.body, :symbolize_names => true
-      if !@template[:uuid]
-        abort "#{$0}: fatal: failed to retrieve pipeline template #{template} #{@template[:errors].inspect rescue nil}"
-      end
-    end
-    self
-  end
-
-  def fetch_instance(instance_uuid)
-    @instance = PipelineInstance.find(instance_uuid)
-    @template = @instance
-    self
-  end
-
-  def apply_parameters(params_args)
-    params_args.shift if params_args[0] == '--'
-    params = {}
-    while !params_args.empty?
-      if (re = params_args[0].match /^(--)?([^-].*?)=(.+)/)
-        params[re[2]] = re[3]
-        params_args.shift
-      elsif params_args.size > 1
-        param = params_args.shift.sub /^--/, ''
-        params[param] = params_args.shift
-      else
-        abort "\n#{Time.now} -- pipeline_template #{@template[:uuid]}\nSyntax error: I do not know what to do with arg \"#{params_args[0]}\""
-      end
-    end
-
-    if not @template[:components].is_a?(Hash)
-      abort "\n#{Time.now} -- pipeline_template #{@template[:uuid]}\nSyntax error: Template missing \"components\" hash"
-    end
-    @components = @template[:components].dup
-
-    bad_components = @components.each_pair.select do |cname, cspec|
-      not cspec.is_a?(Hash)
-    end
-    if bad_components.any?
-      abort "\n#{Time.now} -- pipeline_template #{@template[:uuid]}\nSyntax error: Components not specified with hashes: #{bad_components.map(&:first).join(', ')}"
-    end
-
-    bad_components = @components.each_pair.select do |cname, cspec|
-      not cspec[:script_parameters].is_a?(Hash)
-    end
-    if bad_components.any?
-      abort "\n#{Time.now} -- pipeline_template #{@template[:uuid]}\nSyntax error: Components missing \"script_parameters\" hashes: #{bad_components.map(&:first).join(', ')}"
-    end
-
-    errors = []
-    @components.each do |componentname, component|
-      component[:script_parameters].each do |parametername, parameter|
-        parameter = { :value => parameter } unless parameter.is_a? Hash
-        if params.has_key?("#{componentname}::#{parametername}")
-          value = params["#{componentname}::#{parametername}"]
-        elsif parameter.has_key?(:value)
-          value = parameter[:value]
-        elsif parameter.has_key?(:output_of)
-          if !@components[parameter[:output_of].intern]
-            errors << [componentname, parametername, "output_of refers to nonexistent component '#{parameter[:output_of]}'"]
-          else
-            # value will be filled in later when the upstream
-            # component's output becomes known
-          end
-          next
-        elsif params.has_key?(parametername.to_s)
-          value = params[parametername.to_s]
-        elsif parameter.has_key?(:default)
-          value = parameter[:default]
-        elsif [false, 'false', 0, '0'].index(parameter[:required])
-          value = nil
-        else
-          errors << [componentname, parametername, "required parameter is missing"]
-          next
-        end
-        debuglog "parameter #{componentname}::#{parametername} == #{value}"
-
-        component[:script_parameters][parametername] =
-          parameter.dup.merge(value: value)
-      end
-    end
-    if !errors.empty?
-      all_errors = errors.collect do |c,p,e|
-        "#{c}::#{p} - #{e}\n"
-      end.join("")
-      abort "\n#{Time.now} -- pipeline_template #{@template[:uuid]}\nErrors:\n#{all_errors}"
-    end
-    debuglog "options=" + @options.pretty_inspect
-    self
-  end
-
-  def setup_instance
-    if @instance
-      @instance[:properties][:run_options] ||= {}
-      if @options[:no_reuse]
-        # override properties of existing instance
-        @instance[:properties][:run_options][:enable_job_reuse] = false
-      else
-        # Default to "enable reuse" if not specified. (This code path
-        # can go away when old clients go away.)
-        if @instance[:properties][:run_options][:enable_job_reuse].nil?
-          @instance[:properties][:run_options][:enable_job_reuse] = true
-        end
-      end
-    else
-      description = $options[:description] ||
-                    ("Created at #{Time.now.localtime}" + (@template[:name].andand.size.andand>0 ? " using the pipeline template *#{@template[:name]}*" : ""))
-      instance_body = {
-        components: @components,
-        properties: {
-          run_options: {
-            enable_job_reuse: !@options[:no_reuse]
-          }
-        },
-        pipeline_template_uuid: @template[:uuid],
-        description: description,
-        state: ($options[:submit] ? 'RunningOnServer' : 'RunningOnClient')
-      }
-      if @options[:project_uuid]
-        instance_body[:owner_uuid] = @options[:project_uuid]
-      end
-      @instance = PipelineInstance.create(instance_body)
-    end
-    self
-  end
-
-  def run
-    moretodo = true
-    interrupted = false
-
-    if @instance[:started_at].nil?
-      @instance[:started_at] = Time.now
-    end
-
-    job_creation_failed = 0
-    while moretodo
-      moretodo = false
-      @components.each do |cname, c|
-        job = nil
-        owner_uuid = @instance[:owner_uuid]
-        # Is the job satisfying this component already known to be
-        # finished? (Already meaning "before we query API server about
-        # the job's current state")
-        c_already_finished = (c[:job] &&
-                              c[:job][:uuid] &&
-                              ["Complete", "Failed", "Cancelled"].include?(c[:job][:state]))
-        if !c[:job] and
-            c[:script_parameters].select { |pname, p| p.is_a? Hash and p[:output_of]}.empty?
-          # No job yet associated with this component and is component inputs
-          # are fully specified (any output_of script_parameters are resolved
-          # to real value)
-          my_submit_id = "instance #{@instance[:uuid]} rand #{rand(2**64).to_s(36)}"
-          job, err = JobCache.create(@instance, cname, {
-            :script => c[:script],
-            :script_parameters => Hash[c[:script_parameters].map do |key, spec|
-                                         [key, spec[:value]]
-                                       end],
-            :script_version => c[:script_version],
-            :repository => c[:repository],
-            :nondeterministic => c[:nondeterministic],
-            :runtime_constraints => c[:runtime_constraints],
-            :owner_uuid => owner_uuid,
-            :is_locked_by_uuid => (@options[:run_jobs_here] ? owner_uuid : nil),
-            :submit_id => my_submit_id,
-            :state => (if @options[:run_jobs_here] then "Running" else "Queued" end)
-          }, {
-            # This is the right place to put these attributes when
-            # dealing with new API servers.
-            :minimum_script_version => c[:minimum_script_version],
-            :exclude_script_versions => c[:exclude_minimum_script_versions],
-            :find_or_create => (@instance[:properties][:run_options].andand[:enable_job_reuse] &&
-                                !c[:nondeterministic]),
-            :filters => c[:filters]
-          })
-          if job
-            debuglog "component #{cname} new job #{job[:uuid]}"
-            c[:job] = job
-            c[:run_in_process] = (@options[:run_jobs_here] and
-                                  job[:submit_id] == my_submit_id)
-          elsif err.nil?
-            debuglog "component #{cname} new job failed", 0
-            job_creation_failed += 1
-          else
-            debuglog "component #{cname} new job failed, err=#{err}", 0
-          end
-        end
-
-        if c[:job] and c[:run_in_process] and not ["Complete", "Failed", "Cancelled"].include? c[:job][:state]
-          report_status
-          begin
-            require 'open3'
-            Open3.popen3("arv-crunch-job", "--force-unlock",
-                         "--job", c[:job][:uuid]) do |stdin, stdout, stderr, wait_thr|
-              debuglog "arv-crunch-job pid #{wait_thr.pid} started", 0
-              stdin.close
-              while true
-                rready, wready, = IO.select([stdout, stderr], [])
-                break if !rready[0]
-                begin
-                  buf = rready[0].read_nonblock(2**20)
-                rescue EOFError
-                  break
-                end
-                (rready[0] == stdout ? $stdout : $stderr).write(buf)
-              end
-              stdout.close
-              stderr.close
-              debuglog "arv-crunch-job pid #{wait_thr.pid} exit #{wait_thr.value.to_i}", 0
-            end
-            if not $arv.job.get(uuid: c[:job][:uuid])[:finished_at]
-              raise Exception.new("arv-crunch-job did not set finished_at.")
-            end
-          rescue Exception => e
-            debuglog "Interrupted (#{e}). Failing job.", 0
-            $arv.job.update(uuid: c[:job][:uuid],
-                            job: {
-                              state: "Failed"
-                            })
-          end
-        end
-
-        if c[:job] and c[:job][:uuid]
-          if ["Running", "Queued"].include?(c[:job][:state])
-            # Job is running (or may be soon) so update copy of job record
-            c[:job] = JobCache.get(c[:job][:uuid])
-          end
-
-          if c[:job][:state] == "Complete"
-            # Populate script_parameters of other components waiting for
-            # this job
-            @components.each do |c2name, c2|
-              c2[:script_parameters].each do |pname, p|
-                if p.is_a? Hash and p[:output_of] == cname.to_s
-                  debuglog "parameter #{c2name}::#{pname} == #{c[:job][:output]}"
-                  c2[:script_parameters][pname] = {value: c[:job][:output]}
-                  moretodo = true
-                end
-              end
-            end
-            unless c_already_finished
-              # This is my first time discovering that the job
-              # succeeded. (At the top of this loop, I was still
-              # waiting for it to finish.)
-
-              if @instance[:name].andand.length.andand > 0
-                pipeline_name = @instance[:name]
-              elsif @template.andand[:name].andand.length.andand > 0
-                pipeline_name = @template[:name]
-              else
-                pipeline_name = @instance[:uuid]
-              end
-              if c[:output_name] != false
-                # Create a collection located in the same project as the pipeline with the contents of the output.
-                portable_data_hash = c[:job][:output]
-                collections = $arv.collection.list(limit: 1,
-                                                   filters: [['portable_data_hash', '=', portable_data_hash]],
-                                                   select: ["portable_data_hash", "manifest_text"]
-                                                   )[:items]
-                if collections.any?
-                  name = c[:output_name] || "Output #{portable_data_hash[0..7]} of #{cname} of #{pipeline_name}"
-
-                  # check if there is a name collision.
-                  name_collisions = $arv.collection.list(filters: [["owner_uuid", "=", owner_uuid],
-                                                                   ["name", "=", name]])[:items]
-
-                  newcollection_actual = nil
-                  if name_collisions.any? and name_collisions.first[:portable_data_hash] == portable_data_hash
-                    # There is already a collection with the same name and the
-                    # same contents, so just point to that.
-                    newcollection_actual = name_collisions.first
-                  end
-
-                  if newcollection_actual.nil?
-                    # Did not find a collection with the same name (or the
-                    # collection has a different portable data hash) so create
-                    # a new collection with ensure_unique_name: true.
-                    newcollection = {
-                      owner_uuid: owner_uuid,
-                      name: name,
-                      portable_data_hash: collections.first[:portable_data_hash],
-                      manifest_text: collections.first[:manifest_text]
-                    }
-                    debuglog "Creating collection #{newcollection}", 0
-                    newcollection_actual = $arv.collection.create collection: newcollection, ensure_unique_name: true
-                  end
-
-                  c[:output_uuid] = newcollection_actual[:uuid]
-                else
-                  debuglog "Could not find a collection with portable data hash #{portable_data_hash}", 0
-                end
-              end
-            end
-          elsif ["Queued", "Running"].include? c[:job][:state]
-            # Job is running or queued to run, so indicate that pipeline
-            # should continue to run
-            moretodo = true
-          elsif c[:job][:state] == "Cancelled"
-            debuglog "component #{cname} job #{c[:job][:uuid]} cancelled."
-            moretodo = false
-          elsif c[:job][:state] == "Failed"
-            moretodo = false
-          end
-        end
-      end
-      @instance[:components] = @components
-      report_status
-
-      if @options[:no_wait]
-        moretodo = false
-      end
-
-      # If job creation fails, just give up on this pipeline instance.
-      if job_creation_failed > 0
-        moretodo = false
-      end
-
-      if moretodo
-        begin
-          sleep 10
-        rescue Interrupt
-          debuglog "interrupt", 0
-          interrupted = true
-          break
-        end
-      end
-    end
-
-    c_in_state = @components.values.group_by { |c|
-      c[:job] and c[:job][:state]
-    }
-    succeeded = c_in_state["Complete"].andand.count || 0
-    failed = (c_in_state["Failed"].andand.count || 0) + (c_in_state["Cancelled"].andand.count || 0)
-    ended = succeeded + failed
-
-    success = (succeeded == @components.length)
-
-    # A job create call failed. Just give up.
-    if job_creation_failed > 0
-      debuglog "job creation failed - giving up on this pipeline instance", 0
-      success = false
-      failed += 1
-    end
-
-    if interrupted
-     if success
-        @instance[:state] = 'Complete'
-     else
-        @instance[:state] = 'Paused'
-     end
-    else
-      if ended == @components.length or failed > 0
-        @instance[:state] = success ? 'Complete' : 'Failed'
-      end
-    end
-
-    if @instance[:finished_at].nil? and ['Complete', 'Failed'].include? @instance[:state]
-      @instance[:finished_at] = Time.now
-    end
-
-    debuglog "pipeline instance state is #{@instance[:state]}"
-
-    # set components_summary
-    components_summary = {"todo" => @components.length - ended, "done" => succeeded, "failed" => failed}
-    @instance[:components_summary] = components_summary
-
-    @instance.save
-  end
-
-  def cleanup
-    if @instance and @instance[:state] == 'RunningOnClient'
-      @instance[:state] = 'Paused'
-      @instance.save
-    end
-  end
-
-  def uuid
-    @instance[:uuid]
-  end
-
-  protected
-
-  def report_status
-    @instance.save
-
-    if @options[:status_json] != '/dev/null'
-      File.open(@options[:status_json], 'w') do |f|
-        f.puts @components.pretty_inspect
-      end
-    end
-
-    if @options[:status_text] != '/dev/null'
-      File.open(@options[:status_text], 'w') do |f|
-        f.puts ""
-        f.puts "#{Time.now} -- pipeline_instance #{@instance[:uuid]}"
-        namewidth = @components.collect { |cname, c| cname.size }.max
-        @components.each do |cname, c|
-          jstatus = if !c[:job]
-                      "-"
-                    else case c[:job][:state]
-                         when "Running"
-                           "#{c[:job][:tasks_summary].inspect}"
-                         when "Complete"
-                           c[:job][:output]
-                         when "Cancelled"
-                           "cancelled #{c[:job][:cancelled_at]}"
-                         when "Failed"
-                           "failed #{c[:job][:finished_at]}"
-                         when "Queued"
-                           "queued #{c[:job][:created_at]}"
-                         end
-                    end
-          f.puts "#{cname.to_s.ljust namewidth} #{c[:job] ? c[:job][:uuid] : '-'.ljust(27)} #{jstatus}"
-        end
-      end
-    end
-  end
-
-  def abort(msg)
-    if @instance
-      if ["New", "Ready", "RunningOnClient",
-          "RunningOnServer"].include?(@instance[:state])
-        @instance[:state] = "Failed"
-        @instance[:finished_at] = Time.now
-        @instance.save
-      end
-      @instance.log_stderr(msg)
-    end
-    Kernel::abort(msg)
-  end
-end
-
-runner = WhRunPipelineInstance.new($options)
-begin
-  if $options[:template]
-    runner.fetch_template($options[:template])
-  else
-    runner.fetch_instance($options[:instance])
-  end
-  runner.apply_parameters(p.leftovers)
-  runner.setup_instance
-  if $options[:submit]
-    runner.instance.save
-    puts runner.instance[:uuid]
-  else
-    runner.run
-  end
-rescue Exception => e
-  runner.cleanup
-  raise e
-end
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
deleted file mode 100755
index 242dff708..000000000
--- a/sdk/cli/bin/crunch-job
+++ /dev/null
@@ -1,2577 +0,0 @@
-#!/usr/bin/env perl
-# -*- mode: perl; perl-indent-level: 2; indent-tabs-mode: nil; -*-
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-=head1 NAME
-
-crunch-job: Execute job steps, save snapshots as requested, collate output.
-
-=head1 SYNOPSIS
-
-Obtain job details from Arvados, run tasks on compute nodes (typically
-invoked by scheduler on controller):
-
- crunch-job --job x-y-z --git-dir /path/to/repo/.git
-
-Obtain job details from command line, run tasks on local machine
-(typically invoked by application or developer on VM):
-
- crunch-job --job '{"script_version":"/path/to/working/tree","script":"scriptname",...}'
-
- crunch-job --job '{"repository":"https://github.com/curoverse/arvados.git","script_version":"master","script":"scriptname",...}'
-
-=head1 OPTIONS
-
-=over
-
-=item --force-unlock
-
-If the job is already locked, steal the lock and run it anyway.
-
-=item --git-dir
-
-Path to a .git directory (or a git URL) where the commit given in the
-job's C<script_version> attribute is to be found. If this is I<not>
-given, the job's C<repository> attribute will be used.
-
-=item --job-api-token
-
-Arvados API authorization token to use during the course of the job.
-
-=item --no-clear-tmp
-
-Do not clear per-job/task temporary directories during initial job
-setup. This can speed up development and debugging when running jobs
-locally.
-
-=item --job
-
-UUID of the job to run, or a JSON-encoded job resource without a
-UUID. If the latter is given, a new job object will be created.
-
-=back
-
-=head1 RUNNING JOBS LOCALLY
-
-crunch-job's log messages appear on stderr along with the job tasks'
-stderr streams. The log is saved in Keep at each checkpoint and when
-the job finishes.
-
-If the job succeeds, the job's output locator is printed on stdout.
-
-While the job is running, the following signals are accepted:
-
-=over
-
-=item control-C, SIGINT, SIGQUIT
-
-Save a checkpoint, terminate any job tasks that are running, and stop.
-
-=item SIGALRM
-
-Save a checkpoint and continue.
-
-=item SIGHUP
-
-Refresh node allocation (i.e., check whether any nodes have been added
-or unallocated) and attributes of the Job record that should affect
-behavior (e.g., cancel job if cancelled_at becomes non-nil).
-
-=back
-
-=cut
-
-
-use strict;
-use POSIX ':sys_wait_h';
-use POSIX qw(strftime);
-use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
-use Arvados;
-use Cwd qw(realpath);
-use Data::Dumper;
-use Digest::MD5 qw(md5_hex);
-use Getopt::Long;
-use IPC::Open2;
-use IO::Select;
-use File::Temp;
-use Fcntl ':flock';
-use File::Path qw( make_path remove_tree );
-
-use constant TASK_TEMPFAIL => 111;
-use constant EX_TEMPFAIL => 75;
-use constant EX_RETRY_UNLOCKED => 93;
-
-$ENV{"TMPDIR"} ||= "/tmp";
-unless (defined $ENV{"CRUNCH_TMP"}) {
-  $ENV{"CRUNCH_TMP"} = $ENV{"TMPDIR"} . "/crunch-job";
-  if ($ENV{"USER"} ne "crunch" && $< != 0) {
-    # use a tmp dir unique for my uid
-    $ENV{"CRUNCH_TMP"} .= "-$<";
-  }
-}
-
-# Create the tmp directory if it does not exist
-if ( ! -d $ENV{"CRUNCH_TMP"} ) {
-  make_path $ENV{"CRUNCH_TMP"} or die "Failed to create temporary working directory: " . $ENV{"CRUNCH_TMP"};
-}
-
-$ENV{"JOB_WORK"} = $ENV{"CRUNCH_TMP"} . "/work";
-$ENV{"CRUNCH_INSTALL"} = "$ENV{CRUNCH_TMP}/opt";
-$ENV{"CRUNCH_WORK"} = $ENV{"JOB_WORK"}; # deprecated
-mkdir ($ENV{"JOB_WORK"});
-
-my %proc;
-my $force_unlock;
-my $git_dir;
-my $jobspec;
-my $job_api_token;
-my $no_clear_tmp;
-my $resume_stash;
-my $cgroup_root = "/sys/fs/cgroup";
-my $docker_bin = "docker.io";
-my $docker_run_args = "";
-my $srun_sync_timeout = 15*60;
-GetOptions('force-unlock' => \$force_unlock,
-           'git-dir=s' => \$git_dir,
-           'job=s' => \$jobspec,
-           'job-api-token=s' => \$job_api_token,
-           'no-clear-tmp' => \$no_clear_tmp,
-           'resume-stash=s' => \$resume_stash,
-           'cgroup-root=s' => \$cgroup_root,
-           'docker-bin=s' => \$docker_bin,
-           'docker-run-args=s' => \$docker_run_args,
-           'srun-sync-timeout=i' => \$srun_sync_timeout,
-    );
-
-if (defined $job_api_token) {
-  $ENV{ARVADOS_API_TOKEN} = $job_api_token;
-}
-
-my $have_slurm = exists $ENV{SLURM_JOB_ID} && exists $ENV{SLURM_NODELIST};
-
-
-$SIG{'USR1'} = sub
-{
-  $main::ENV{CRUNCH_DEBUG} = 1;
-};
-$SIG{'USR2'} = sub
-{
-  $main::ENV{CRUNCH_DEBUG} = 0;
-};
-
-my $arv = Arvados->new('apiVersion' => 'v1');
-
-my $Job;
-my $job_id;
-my $dbh;
-my $sth;
-my @jobstep;
-
-my $local_job;
-if ($jobspec =~ /^[-a-z\d]+$/)
-{
-  # $jobspec is an Arvados UUID, not a JSON job specification
-  $Job = api_call("jobs/get", uuid => $jobspec);
-  $local_job = 0;
-}
-else
-{
-  $local_job = JSON::decode_json($jobspec);
-}
-
-
-# Make sure our workers (our slurm nodes, localhost, or whatever) are
-# at least able to run basic commands: they aren't down or severely
-# misconfigured.
-my $cmd = ['true'];
-if (($Job || $local_job)->{docker_image_locator}) {
-  $cmd = [$docker_bin, 'ps', '-q'];
-}
-Log(undef, "Sanity check is `@$cmd`");
-my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
-  ["srun", "--nodes=\Q$ENV{SLURM_NNODES}\E", "--ntasks-per-node=1"],
-  $cmd,
-  {label => "sanity check"});
-if ($exited != 0) {
-  Log(undef, "Sanity check failed: ".exit_status_s($exited));
-  exit EX_TEMPFAIL;
-}
-Log(undef, "Sanity check OK");
-
-
-my $User = api_call("users/current");
-
-if (!$local_job) {
-  if (!$force_unlock) {
-    # Claim this job, and make sure nobody else does
-    eval { api_call("jobs/lock", uuid => $Job->{uuid}); };
-    if ($@) {
-      Log(undef, "Error while locking job, exiting ".EX_TEMPFAIL);
-      exit EX_TEMPFAIL;
-    };
-  }
-}
-else
-{
-  if (!$resume_stash)
-  {
-    map { croak ("No $_ specified") unless $local_job->{$_} }
-    qw(script script_version script_parameters);
-  }
-
-  $local_job->{'is_locked_by_uuid'} = $User->{'uuid'};
-  $local_job->{'started_at'} = gmtime;
-  $local_job->{'state'} = 'Running';
-
-  $Job = api_call("jobs/create", job => $local_job);
-}
-$job_id = $Job->{'uuid'};
-
-my $keep_logfile = $job_id . '.log.txt';
-log_writer_start($keep_logfile);
-
-$Job->{'runtime_constraints'} ||= {};
-$Job->{'runtime_constraints'}->{'max_tasks_per_node'} ||= 0;
-my $max_ncpus = $Job->{'runtime_constraints'}->{'max_tasks_per_node'};
-
-my $gem_versions = `gem list --quiet arvados-cli 2>/dev/null`;
-if ($? == 0) {
-  $gem_versions =~ s/^arvados-cli \(/ with arvados-cli Gem version(s) /;
-  chomp($gem_versions);
-  chop($gem_versions);  # Closing parentheses
-} else {
-  $gem_versions = "";
-}
-Log(undef,
-    "running from " . ((-e $0) ? realpath($0) : "stdin") . $gem_versions);
-
-Log (undef, "check slurm allocation");
-my @slot;
-my @node;
-# Should use $ENV{SLURM_TASKS_PER_NODE} instead of sinfo? (eg. "4(x3),2,4(x2)")
-my @sinfo;
-if (!$have_slurm)
-{
-  my $localcpus = 0 + `grep -cw ^processor /proc/cpuinfo` || 1;
-  push @sinfo, "$localcpus localhost";
-}
-if (exists $ENV{SLURM_NODELIST})
-{
-  push @sinfo, `sinfo -h --format='%c %N' --nodes=\Q$ENV{SLURM_NODELIST}\E`;
-}
-foreach (@sinfo)
-{
-  my ($ncpus, $slurm_nodelist) = split;
-  $ncpus = $max_ncpus if $max_ncpus && $ncpus > $max_ncpus;
-
-  my @nodelist;
-  while ($slurm_nodelist =~ s/^([^\[,]+?(\[.*?\])?)(,|$)//)
-  {
-    my $nodelist = $1;
-    if ($nodelist =~ /\[((\d+)(-(\d+))?(,(\d+)(-(\d+))?)*)\]/)
-    {
-      my $ranges = $1;
-      foreach (split (",", $ranges))
-      {
-	my ($a, $b);
-	if (/(\d+)-(\d+)/)
-	{
-	  $a = $1;
-	  $b = $2;
-	}
-	else
-	{
-	  $a = $_;
-	  $b = $_;
-	}
-	push @nodelist, map {
-	  my $n = $nodelist;
-	  $n =~ s/\[[-,\d]+\]/$_/;
-	  $n;
-	} ($a..$b);
-      }
-    }
-    else
-    {
-      push @nodelist, $nodelist;
-    }
-  }
-  foreach my $nodename (@nodelist)
-  {
-    Log (undef, "node $nodename - $ncpus slots");
-    my $node = { name => $nodename,
-                 ncpus => $ncpus,
-                 # The number of consecutive times a task has been dispatched
-                 # to this node and failed.
-                 losing_streak => 0,
-                 # The number of consecutive times that SLURM has reported
-                 # a node failure since the last successful task.
-                 fail_count => 0,
-                 # Don't dispatch work to this node until this time
-                 # (in seconds since the epoch) has passed.
-                 hold_until => 0 };
-    foreach my $cpu (1..$ncpus)
-    {
-      push @slot, { node => $node,
-		    cpu => $cpu };
-    }
-  }
-  push @node, @nodelist;
-}
-
-
-
-# Ensure that we get one jobstep running on each allocated node before
-# we start overloading nodes with concurrent steps
-
- at slot = sort { $a->{cpu} <=> $b->{cpu} } @slot;
-
-
-$Job->update_attributes(
-  'tasks_summary' => { 'failed' => 0,
-                       'todo' => 1,
-                       'running' => 0,
-                       'done' => 0 });
-
-Log (undef, "start");
-$SIG{'INT'} = sub { $main::please_freeze = 1; };
-$SIG{'QUIT'} = sub { $main::please_freeze = 1; };
-$SIG{'TERM'} = \&croak;
-$SIG{'TSTP'} = sub { $main::please_freeze = 1; };
-$SIG{'ALRM'} = sub { $main::please_info = 1; };
-$SIG{'CONT'} = sub { $main::please_continue = 1; };
-$SIG{'HUP'} = sub { $main::please_refresh = 1; };
-
-$main::please_freeze = 0;
-$main::please_info = 0;
-$main::please_continue = 0;
-$main::please_refresh = 0;
-my $jobsteps_must_output_keys = 0;	# becomes 1 when any task outputs a key
-
-grep { $ENV{$1} = $2 if /^(NOCACHE.*?)=(.*)/ } split ("\n", $$Job{knobs});
-$ENV{"CRUNCH_JOB_UUID"} = $job_id;
-$ENV{"JOB_UUID"} = $job_id;
-
-
-my @jobstep_todo = ();
-my @jobstep_done = ();
-my @jobstep_tomerge = ();
-my $jobstep_tomerge_level = 0;
-my $squeue_checked = 0;
-my $sinfo_checked = 0;
-my $latest_refresh = scalar time;
-
-
-
-if (defined $Job->{thawedfromkey})
-{
-  thaw ($Job->{thawedfromkey});
-}
-else
-{
-  my $first_task = api_call("job_tasks/create", job_task => {
-    'job_uuid' => $Job->{'uuid'},
-    'sequence' => 0,
-    'qsequence' => 0,
-    'parameters' => {},
-  });
-  push @jobstep, { 'level' => 0,
-		   'failures' => 0,
-                   'arvados_task' => $first_task,
-		 };
-  push @jobstep_todo, 0;
-}
-
-
-if (!$have_slurm)
-{
-  must_lock_now("$ENV{CRUNCH_TMP}/.lock", "a job is already running here.");
-}
-
-my $build_script = handle_readall(\*DATA);
-my $nodelist = join(",", @node);
-my $git_tar_count = 0;
-
-if (!defined $no_clear_tmp) {
-  # Find FUSE mounts under $CRUNCH_TMP and unmount them.  Then clean
-  # up work directories crunch_tmp/work, crunch_tmp/opt,
-  # crunch_tmp/src*.
-  my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
-    ["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
-    ['bash', '-ec', q{
-arv-mount --unmount-timeout 10 --unmount-all ${CRUNCH_TMP}
-rm -rf ${JOB_WORK} ${CRUNCH_INSTALL} ${CRUNCH_TMP}/task ${CRUNCH_TMP}/src* ${CRUNCH_TMP}/*.cid
-    }],
-    {label => "clean work dirs"});
-  if ($exited != 0) {
-    exit_retry_unlocked();
-  }
-}
-
-# If this job requires a Docker image, install that.
-my ($docker_locator, $docker_stream, $docker_hash, $docker_limitmem, $dockeruserarg);
-if ($docker_locator = $Job->{docker_image_locator}) {
-  Log (undef, "Install docker image $docker_locator");
-  ($docker_stream, $docker_hash) = find_docker_image($docker_locator);
-  if (!$docker_hash)
-  {
-    croak("No Docker image hash found from locator $docker_locator");
-  }
-  Log (undef, "docker image hash is $docker_hash");
-  $docker_stream =~ s/^\.//;
-  my $docker_install_script = qq{
-loaded() {
-  id=\$($docker_bin inspect --format="{{.ID}}" \Q$docker_hash\E) || return 1
-  echo "image ID is \$id"
-  [[ \${id} = \Q$docker_hash\E ]]
-}
-if loaded >&2 2>/dev/null; then
-  echo >&2 "image is already present"
-  exit 0
-fi
-echo >&2 "docker image is not present; loading"
-arv-get \Q$docker_locator$docker_stream/$docker_hash.tar\E | $docker_bin load
-if ! loaded >&2; then
-  echo >&2 "`docker load` exited 0, but image is not found (!)"
-  exit 1
-fi
-echo >&2 "image loaded successfully"
-};
-
-  my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
-    ["srun", "--nodelist=" . join(',', @node)],
-    ["/bin/bash", "-o", "pipefail", "-ec", $docker_install_script],
-    {label => "load docker image"});
-  if ($exited != 0)
-  {
-    exit_retry_unlocked();
-  }
-
-  # Determine whether this version of Docker supports memory+swap limits.
-  ($exited, $stdout, $stderr, $tempfail) = srun_sync(
-    ["srun", "--nodes=1"],
-    [$docker_bin, 'run', '--help'],
-    {label => "check --memory-swap feature"});
-  if ($tempfail) {
-    exit_retry_unlocked();
-  }
-  $docker_limitmem = ($stdout =~ /--memory-swap/);
-
-  # Find a non-root Docker user to use.
-  # Tries the default user for the container, then 'crunch', then 'nobody',
-  # testing for whether the actual user id is non-zero.  This defends against
-  # mistakes but not malice, but we intend to harden the security in the future
-  # so we don't want anyone getting used to their jobs running as root in their
-  # Docker containers.
-  my @tryusers = ("", "crunch", "nobody");
-  foreach my $try_user (@tryusers) {
-    my $label;
-    my $try_user_arg;
-    if ($try_user eq "") {
-      $label = "check whether default user is UID 0";
-      $try_user_arg = "";
-    } else {
-      $label = "check whether user '$try_user' is UID 0";
-      $try_user_arg = "--user=$try_user";
-    }
-    my ($exited, $stdout, $stderr, $tempfail) = srun_sync(
-      ["srun", "--nodes=1"],
-      ["/bin/sh", "-ec",
-       "$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user"],
-      {label => $label});
-    chomp($stdout);
-    if ($exited == 0 && $stdout =~ /^\d+$/ && $stdout > 0) {
-      $dockeruserarg = $try_user_arg;
-      if ($try_user eq "") {
-        Log(undef, "Container will run with default user");
-      } else {
-        Log(undef, "Container will run with $dockeruserarg");
-      }
-      last;
-    } elsif ($tempfail) {
-      exit_retry_unlocked();
-    }
-  }
-
-  if (!defined $dockeruserarg) {
-    croak("Could not find a user in container that is not UID 0 (tried default user, @tryusers) or there was a problem running 'id' in the container.");
-  }
-
-  if ($Job->{arvados_sdk_version}) {
-    # The job also specifies an Arvados SDK version.  Add the SDKs to the
-    # tar file for the build script to install.
-    Log(undef, sprintf("Packing Arvados SDK version %s for installation",
-                       $Job->{arvados_sdk_version}));
-    add_git_archive("git", "--git-dir=$git_dir", "archive",
-                    "--prefix=.arvados.sdk/",
-                    $Job->{arvados_sdk_version}, "sdk");
-  }
-}
-
-if (!defined $git_dir && $Job->{'script_version'} =~ m{^/}) {
-  # If script_version looks like an absolute path, *and* the --git-dir
-  # argument was not given -- which implies we were not invoked by
-  # crunch-dispatch -- we will use the given path as a working
-  # directory instead of resolving script_version to a git commit (or
-  # doing anything else with git).
-  $ENV{"CRUNCH_SRC_COMMIT"} = $Job->{'script_version'};
-  $ENV{"CRUNCH_SRC"} = $Job->{'script_version'};
-}
-else {
-  # Resolve the given script_version to a git commit sha1. Also, if
-  # the repository is remote, clone it into our local filesystem: this
-  # ensures "git archive" will work, and is necessary to reliably
-  # resolve a symbolic script_version like "master^".
-  $ENV{"CRUNCH_SRC"} = "$ENV{CRUNCH_TMP}/src";
-
-  Log (undef, "Looking for version ".$Job->{script_version}." from repository ".$Job->{repository});
-
-  $ENV{"CRUNCH_SRC_COMMIT"} = $Job->{script_version};
-
-  # If we're running under crunch-dispatch, it will have already
-  # pulled the appropriate source tree into its own repository, and
-  # given us that repo's path as $git_dir.
-  #
-  # If we're running a "local" job, we might have to fetch content
-  # from a remote repository.
-  #
-  # (Currently crunch-dispatch gives a local path with --git-dir, but
-  # we might as well accept URLs there too in case it changes its
-  # mind.)
-  my $repo = $git_dir || $Job->{'repository'};
-
-  # Repository can be remote or local. If remote, we'll need to fetch it
-  # to a local dir before doing `git log` et al.
-  my $repo_location;
-
-  if ($repo =~ m{://|^[^/]*:}) {
-    # $repo is a git url we can clone, like git:// or https:// or
-    # file:/// or [user@]host:repo.git. Note "user/name at host:foo" is
-    # not recognized here because distinguishing that from a local
-    # path is too fragile. If you really need something strange here,
-    # use the ssh:// form.
-    $repo_location = 'remote';
-  } elsif ($repo =~ m{^\.*/}) {
-    # $repo is a local path to a git index. We'll also resolve ../foo
-    # to ../foo/.git if the latter is a directory. To help
-    # disambiguate local paths from named hosted repositories, this
-    # form must be given as ./ or ../ if it's a relative path.
-    if (-d "$repo/.git") {
-      $repo = "$repo/.git";
-    }
-    $repo_location = 'local';
-  } else {
-    # $repo is none of the above. It must be the name of a hosted
-    # repository.
-    my $arv_repo_list = api_call("repositories/list",
-                                 'filters' => [['name','=',$repo]]);
-    my @repos_found = @{$arv_repo_list->{'items'}};
-    my $n_found = $arv_repo_list->{'serverResponse'}->{'items_available'};
-    if ($n_found > 0) {
-      Log(undef, "Repository '$repo' -> "
-          . join(", ", map { $_->{'uuid'} } @repos_found));
-    }
-    if ($n_found != 1) {
-      croak("Error: Found $n_found repositories with name '$repo'.");
-    }
-    $repo = $repos_found[0]->{'fetch_url'};
-    $repo_location = 'remote';
-  }
-  Log(undef, "Using $repo_location repository '$repo'");
-  $ENV{"CRUNCH_SRC_URL"} = $repo;
-
-  # Resolve given script_version (we'll call that $treeish here) to a
-  # commit sha1 ($commit).
-  my $treeish = $Job->{'script_version'};
-  my $commit;
-  if ($repo_location eq 'remote') {
-    # We minimize excess object-fetching by re-using the same bare
-    # repository in CRUNCH_TMP/.git for multiple crunch-jobs -- we
-    # just keep adding remotes to it as needed.
-    my $local_repo = $ENV{'CRUNCH_TMP'}."/.git";
-    my $gitcmd = "git --git-dir=\Q$local_repo\E";
-
-    # Set up our local repo for caching remote objects, making
-    # archives, etc.
-    if (!-d $local_repo) {
-      make_path($local_repo) or croak("Error: could not create $local_repo");
-    }
-    # This works (exits 0 and doesn't delete fetched objects) even
-    # if $local_repo is already initialized:
-    `$gitcmd init --bare`;
-    if ($?) {
-      croak("Error: $gitcmd init --bare exited ".exit_status_s($?));
-    }
-
-    # If $treeish looks like a hash (or abbrev hash) we look it up in
-    # our local cache first, since that's cheaper. (We don't want to
-    # do that with tags/branches though -- those change over time, so
-    # they should always be resolved by the remote repo.)
-    if ($treeish =~ /^[0-9a-f]{7,40}$/s) {
-      # Hide stderr because it's normal for this to fail:
-      my $sha1 = `$gitcmd rev-list -n1 ''\Q$treeish\E 2>/dev/null`;
-      if ($? == 0 &&
-          # Careful not to resolve a branch named abcdeff to commit 1234567:
-          $sha1 =~ /^$treeish/ &&
-          $sha1 =~ /^([0-9a-f]{40})$/s) {
-        $commit = $1;
-        Log(undef, "Commit $commit already present in $local_repo");
-      }
-    }
-
-    if (!defined $commit) {
-      # If $treeish isn't just a hash or abbrev hash, or isn't here
-      # yet, we need to fetch the remote to resolve it correctly.
-
-      # First, remove all local heads. This prevents a name that does
-      # not exist on the remote from resolving to (or colliding with)
-      # a previously fetched branch or tag (possibly from a different
-      # remote).
-      remove_tree("$local_repo/refs/heads", {keep_root => 1});
-
-      Log(undef, "Fetching objects from $repo to $local_repo");
-      `$gitcmd fetch --no-progress --tags ''\Q$repo\E \Q+refs/heads/*:refs/heads/*\E`;
-      if ($?) {
-        croak("Error: `$gitcmd fetch` exited ".exit_status_s($?));
-      }
-    }
-
-    # Now that the data is all here, we will use our local repo for
-    # the rest of our git activities.
-    $repo = $local_repo;
-  }
-
-  my $gitcmd = "git --git-dir=\Q$repo\E";
-  my $sha1 = `$gitcmd rev-list -n1 ''\Q$treeish\E`;
-  unless ($? == 0 && $sha1 =~ /^([0-9a-f]{40})$/) {
-    croak("`$gitcmd rev-list` exited "
-          .exit_status_s($?)
-          .", '$treeish' not found, giving up");
-  }
-  $commit = $1;
-  Log(undef, "Version $treeish is commit $commit");
-
-  if ($commit ne $Job->{'script_version'}) {
-    # Record the real commit id in the database, frozentokey, logs,
-    # etc. -- instead of an abbreviation or a branch name which can
-    # become ambiguous or point to a different commit in the future.
-    if (!$Job->update_attributes('script_version' => $commit)) {
-      croak("Error: failed to update job's script_version attribute");
-    }
-  }
-
-  $ENV{"CRUNCH_SRC_COMMIT"} = $commit;
-  add_git_archive("$gitcmd archive ''\Q$commit\E");
-}
-
-my $git_archive = combined_git_archive();
-if (!defined $git_archive) {
-  Log(undef, "Skip install phase (no git archive)");
-  if ($have_slurm) {
-    Log(undef, "Warning: This probably means workers have no source tree!");
-  }
-}
-else {
-  my $exited;
-  my $install_script_tries_left = 3;
-  for (my $attempts = 0; $attempts < 3; $attempts++) {
-    my @srunargs = ("srun",
-                    "--nodelist=$nodelist",
-                    "-D", $ENV{'TMPDIR'}, "--job-name=$job_id");
-    my @execargs = ("sh", "-c",
-                    "mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -");
-
-    $ENV{"CRUNCH_GIT_ARCHIVE_HASH"} = md5_hex($git_archive);
-    my ($stdout, $stderr, $tempfail);
-    ($exited, $stdout, $stderr, $tempfail) = srun_sync(
-      \@srunargs, \@execargs,
-      {label => "run install script on all workers"},
-        $build_script . $git_archive);
-    if ($tempfail) {
-      exit_retry_unlocked();
-    }
-
-    my $stderr_anything_from_script = 0;
-    for my $line (split(/\n/, $stderr)) {
-      if ($line !~ /^(srun: error: |starting: \[)/) {
-        $stderr_anything_from_script = 1;
-      }
-    }
-
-    last if $exited == 0 || $main::please_freeze;
-
-    # If the install script fails but doesn't print an error message,
-    # the next thing anyone is likely to do is just run it again in
-    # case it was a transient problem like "slurm communication fails
-    # because the network isn't reliable enough". So we'll just do
-    # that ourselves (up to 3 attempts in total). OTOH, if there is an
-    # error message, the problem is more likely to have a real fix and
-    # we should fail the job so the fixing process can start, instead
-    # of doing 2 more attempts.
-    last if $stderr_anything_from_script;
-  }
-
-  foreach my $tar_filename (map { tar_filename_n($_); } (1..$git_tar_count)) {
-    unlink($tar_filename);
-  }
-
-  if ($exited != 0) {
-    croak("Giving up");
-  }
-}
-
-foreach (qw (script script_version script_parameters runtime_constraints))
-{
-  Log (undef,
-       "$_ " .
-       (ref($Job->{$_}) ? JSON::encode_json($Job->{$_}) : $Job->{$_}));
-}
-foreach (split (/\n/, $Job->{knobs}))
-{
-  Log (undef, "knob " . $_);
-}
-my $resp = api_call(
-  'nodes/list',
-  'filters' => [['hostname', 'in', \@node]],
-  'order' => 'hostname',
-  'limit' => scalar(@node),
-    );
-for my $n (@{$resp->{items}}) {
-  Log(undef, "$n->{hostname} $n->{uuid} ".JSON::encode_json($n->{properties}));
-}
-
-
-
-$main::success = undef;
-
-
-
-ONELEVEL:
-
-my $thisround_succeeded = 0;
-my $thisround_failed = 0;
-my $thisround_failed_multiple = 0;
-my $working_slot_count = scalar(@slot);
-
- at jobstep_todo = sort { $jobstep[$a]->{level} <=> $jobstep[$b]->{level}
-		       or $a <=> $b } @jobstep_todo;
-my $level = $jobstep[$jobstep_todo[0]]->{level};
-
-my $initial_tasks_this_level = 0;
-foreach my $id (@jobstep_todo) {
-  $initial_tasks_this_level++ if ($jobstep[$id]->{level} == $level);
-}
-
-# If the number of tasks scheduled at this level #T is smaller than the number
-# of slots available #S, only use the first #T slots, or the first slot on
-# each node, whichever number is greater.
-#
-# When we dispatch tasks later, we'll allocate whole-node resources like RAM
-# based on these numbers.  Using fewer slots makes more resources available
-# to each individual task, which should normally be a better strategy when
-# there are fewer of them running with less parallelism.
-#
-# Note that this calculation is not redone if the initial tasks at
-# this level queue more tasks at the same level.  This may harm
-# overall task throughput for that level.
-my @freeslot;
-if ($initial_tasks_this_level < @node) {
-  @freeslot = (0..$#node);
-} elsif ($initial_tasks_this_level < @slot) {
-  @freeslot = (0..$initial_tasks_this_level - 1);
-} else {
-  @freeslot = (0..$#slot);
-}
-my $round_num_freeslots = scalar(@freeslot);
-print STDERR "crunch-job have ${round_num_freeslots} free slots for ${initial_tasks_this_level} initial tasks at this level, ".scalar(@node)." nodes, and ".scalar(@slot)." slots\n";
-
-my %round_max_slots = ();
-for (my $ii = $#freeslot; $ii >= 0; $ii--) {
-  my $this_slot = $slot[$freeslot[$ii]];
-  my $node_name = $this_slot->{node}->{name};
-  $round_max_slots{$node_name} ||= $this_slot->{cpu};
-  last if (scalar(keys(%round_max_slots)) >= @node);
-}
-
-Log(undef, "start level $level with $round_num_freeslots slots");
-my @holdslot;
-my %reader;
-my $progress_is_dirty = 1;
-my $progress_stats_updated = 0;
-
-update_progress_stats();
-
-
-THISROUND:
-for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
-{
-  # Don't create new tasks if we already know the job's final result.
-  last if defined($main::success);
-
-  my $id = $jobstep_todo[$todo_ptr];
-  my $Jobstep = $jobstep[$id];
-  if ($Jobstep->{level} != $level)
-  {
-    next;
-  }
-
-  pipe $reader{$id}, "writer" or croak("pipe() failed: $!");
-  set_nonblocking($reader{$id});
-
-  my $childslot = $freeslot[0];
-  my $childnode = $slot[$childslot]->{node};
-  my $childslotname = join (".",
-			    $slot[$childslot]->{node}->{name},
-			    $slot[$childslot]->{cpu});
-
-  my $childpid = fork();
-  if ($childpid == 0)
-  {
-    $SIG{'INT'} = 'DEFAULT';
-    $SIG{'QUIT'} = 'DEFAULT';
-    $SIG{'TERM'} = 'DEFAULT';
-
-    foreach (values (%reader))
-    {
-      close($_);
-    }
-    fcntl ("writer", F_SETFL, 0) or croak ($!); # no close-on-exec
-    open(STDOUT,">&writer") or croak ($!);
-    open(STDERR,">&writer") or croak ($!);
-
-    undef $dbh;
-    undef $sth;
-
-    delete $ENV{"GNUPGHOME"};
-    $ENV{"TASK_UUID"} = $Jobstep->{'arvados_task'}->{'uuid'};
-    $ENV{"TASK_QSEQUENCE"} = $id;
-    $ENV{"TASK_SEQUENCE"} = $level;
-    $ENV{"JOB_SCRIPT"} = $Job->{script};
-    while (my ($param, $value) = each %{$Job->{script_parameters}}) {
-      $param =~ tr/a-z/A-Z/;
-      $ENV{"JOB_PARAMETER_$param"} = $value;
-    }
-    $ENV{"TASK_SLOT_NODE"} = $slot[$childslot]->{node}->{name};
-    $ENV{"TASK_SLOT_NUMBER"} = $slot[$childslot]->{cpu};
-    $ENV{"TASK_WORK"} = $ENV{"CRUNCH_TMP"}."/task/$childslotname";
-    $ENV{"HOME"} = $ENV{"TASK_WORK"};
-    $ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated
-    $ENV{"CRUNCH_NODE_SLOTS"} = $round_max_slots{$ENV{TASK_SLOT_NODE}};
-    $ENV{"PATH"} = $ENV{"CRUNCH_INSTALL"} . "/bin:" . $ENV{"PATH"};
-
-    my $keep_mnt = $ENV{"TASK_WORK"}.".keep";
-
-    $ENV{"GZIP"} = "-n";
-
-    my @srunargs = (
-      "srun",
-      "--nodelist=".$childnode->{name},
-      qw(-n1 -c1 -N1 -D), $ENV{'TMPDIR'},
-      "--job-name=$job_id.$id.$$",
-	);
-
-    my $stdbuf = " stdbuf --output=0 --error=0 ";
-
-    my $arv_file_cache = "";
-    if (defined($Job->{'runtime_constraints'}->{'keep_cache_mb_per_task'})) {
-      $arv_file_cache = "--file-cache=" . ($Job->{'runtime_constraints'}->{'keep_cache_mb_per_task'} * 1024 * 1024);
-    }
-
-    my $command =
-	"if [ -e \Q$ENV{TASK_WORK}\E ]; then rm -rf \Q$ENV{TASK_WORK}\E; fi; "
-        ."mkdir -p \Q$ENV{CRUNCH_TMP}\E \Q$ENV{JOB_WORK}\E \Q$ENV{TASK_WORK}\E \Q$keep_mnt\E "
-	."&& cd \Q$ENV{CRUNCH_TMP}\E "
-        # These environment variables get used explicitly later in
-        # $command.  No tool is expected to read these values directly.
-        .q{&& MEM=$(awk '($1 == "MemTotal:"){print $2}' </proc/meminfo) }
-        .q{&& SWAP=$(awk '($1 == "SwapTotal:"){print $2}' </proc/meminfo) }
-        ."&& MEMLIMIT=\$(( (\$MEM * 95) / ($ENV{CRUNCH_NODE_SLOTS} * 100) )) "
-        ."&& let SWAPLIMIT=\$MEMLIMIT+\$SWAP "
-        .q{&& declare -a VOLUMES=() }
-        .q{&& if which crunchrunner >/dev/null ; then VOLUMES+=("--volume=$(which crunchrunner):/usr/local/bin/crunchrunner:ro") ; fi }
-        .q{&& if test -f /etc/ssl/certs/ca-certificates.crt ; then VOLUMES+=("--volume=/etc/ssl/certs/ca-certificates.crt:/etc/arvados/ca-certificates.crt:ro") ; }
-        .q{elif test -f /etc/pki/tls/certs/ca-bundle.crt ; then VOLUMES+=("--volume=/etc/pki/tls/certs/ca-bundle.crt:/etc/arvados/ca-certificates.crt:ro") ; fi };
-
-    $command .= "&& exec arv-mount --read-write --mount-by-pdh=by_pdh --mount-tmp=tmp --crunchstat-interval=10 --allow-other $arv_file_cache \Q$keep_mnt\E --exec ";
-    $ENV{TASK_KEEPMOUNT} = "$keep_mnt/by_pdh";
-    $ENV{TASK_KEEPMOUNT_TMP} = "$keep_mnt/tmp";
-
-    if ($docker_hash)
-    {
-      my $containername = "$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}";
-      my $cidfile = "$ENV{CRUNCH_TMP}/$containername.cid";
-      $command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 ";
-      $command .= "$docker_bin run $docker_run_args --name=$containername --attach=stdout --attach=stderr --attach=stdin -i \Q$dockeruserarg\E --cidfile=$cidfile --sig-proxy ";
-      # We only set memory limits if Docker lets us limit both memory and swap.
-      # Memory limits alone have been supported longer, but subprocesses tend
-      # to get SIGKILL if they exceed that without any swap limit set.
-      # See #5642 for additional background.
-      if ($docker_limitmem) {
-        $command .= "--memory=\${MEMLIMIT}k --memory-swap=\${SWAPLIMIT}k ";
-      }
-
-      # The source tree and $destdir directory (which we have
-      # installed on the worker host) are available in the container,
-      # under the same path.
-      $command .= "--volume=\Q$ENV{CRUNCH_SRC}:$ENV{CRUNCH_SRC}:ro\E ";
-      $command .= "--volume=\Q$ENV{CRUNCH_INSTALL}:$ENV{CRUNCH_INSTALL}:ro\E ";
-
-      # Currently, we make the "by_pdh" directory in arv-mount's mount
-      # point appear at /keep inside the container (instead of using
-      # the same path as the host like we do with CRUNCH_SRC and
-      # CRUNCH_INSTALL). However, crunch scripts and utilities must
-      # not rely on this. They must use $TASK_KEEPMOUNT.
-      $command .= "--volume=\Q$ENV{TASK_KEEPMOUNT}:/keep:ro\E ";
-      $ENV{TASK_KEEPMOUNT} = "/keep";
-
-      # Ditto TASK_KEEPMOUNT_TMP, as /keep_tmp.
-      $command .= "--volume=\Q$ENV{TASK_KEEPMOUNT_TMP}:/keep_tmp\E ";
-      $ENV{TASK_KEEPMOUNT_TMP} = "/keep_tmp";
-
-      # TASK_WORK is almost exactly like a docker data volume: it
-      # starts out empty, is writable, and persists until no
-      # containers use it any more. We don't use --volumes-from to
-      # share it with other containers: it is only accessible to this
-      # task, and it goes away when this task stops.
-      #
-      # However, a docker data volume is writable only by root unless
-      # the mount point already happens to exist in the container with
-      # different permissions. Therefore, we [1] assume /tmp already
-      # exists in the image and is writable by the crunch user; [2]
-      # avoid putting TASK_WORK inside CRUNCH_TMP (which won't be
-      # writable if they are created by docker while setting up the
-      # other --volumes); and [3] create $TASK_WORK inside the
-      # container using $build_script.
-      $command .= "--volume=/tmp ";
-      $ENV{"TASK_WORK"} = "/tmp/crunch-job-task-work/$childslotname";
-      $ENV{"HOME"} = $ENV{"TASK_WORK"};
-      $ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated
-
-      # TODO: Share a single JOB_WORK volume across all task
-      # containers on a given worker node, and delete it when the job
-      # ends (and, in case that doesn't work, when the next job
-      # starts).
-      #
-      # For now, use the same approach as TASK_WORK above.
-      $ENV{"JOB_WORK"} = "/tmp/crunch-job-work";
-
-      # Bind mount the crunchrunner binary and host TLS certificates file into
-      # the container.
-      $command .= '"${VOLUMES[@]}" ';
-
-      while (my ($env_key, $env_val) = each %ENV)
-      {
-        if ($env_key =~ /^(ARVADOS|CRUNCH|JOB|TASK)_/) {
-          $command .= "--env=\Q$env_key=$env_val\E ";
-        }
-      }
-      $command .= "--env=\QHOME=$ENV{HOME}\E ";
-      $command .= "\Q$docker_hash\E ";
-
-      if ($Job->{arvados_sdk_version}) {
-        $command .= $stdbuf;
-        $command .= "perl - \Q$ENV{CRUNCH_SRC}/crunch_scripts/$Job->{script}\E";
-      } else {
-        $command .= "/bin/sh -c \'python -c " .
-            '"from pkg_resources import get_distribution as get; print \"Using Arvados SDK version\", get(\"arvados-python-client\").version"' .
-            ">&2 2>/dev/null; " .
-            "mkdir -p \"$ENV{JOB_WORK}\" \"$ENV{TASK_WORK}\" && " .
-            "if which stdbuf >/dev/null ; then " .
-            "  exec $stdbuf \Q$ENV{CRUNCH_SRC}/crunch_scripts/$Job->{script}\E ;" .
-            " else " .
-            "  exec \Q$ENV{CRUNCH_SRC}/crunch_scripts/$Job->{script}\E ;" .
-            " fi\'";
-      }
-    } else {
-      # Non-docker run
-      $command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -poll=10000 ";
-      $command .= $stdbuf;
-      $command .= "perl - $ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"};
-    }
-
-    my @execargs = ('bash', '-c', $command);
-    srun (\@srunargs, \@execargs, undef, $build_script);
-    # exec() failed, we assume nothing happened.
-    die "srun() failed on build script\n";
-  }
-  close("writer");
-  if (!defined $childpid)
-  {
-    close $reader{$id};
-    delete $reader{$id};
-    next;
-  }
-  shift @freeslot;
-  $proc{$childpid} = {
-    jobstepidx => $id,
-    time => time,
-    slot => $childslot,
-    jobstepname => "$job_id.$id.$childpid",
-  };
-  croak ("assert failed: \$slot[$childslot]->{'pid'} exists") if exists $slot[$childslot]->{pid};
-  $slot[$childslot]->{pid} = $childpid;
-
-  Log ($id, "job_task ".$Jobstep->{'arvados_task'}->{'uuid'});
-  Log ($id, "child $childpid started on $childslotname");
-  $Jobstep->{starttime} = time;
-  $Jobstep->{node} = $childnode->{name};
-  $Jobstep->{slotindex} = $childslot;
-  delete $Jobstep->{stderr};
-  delete $Jobstep->{finishtime};
-  delete $Jobstep->{tempfail};
-
-  $Jobstep->{'arvados_task'}->{started_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{starttime});
-  retry_op(sub { $Jobstep->{'arvados_task'}->save; }, "job_tasks.update API");
-
-  splice @jobstep_todo, $todo_ptr, 1;
-  --$todo_ptr;
-
-  $progress_is_dirty = 1;
-
-  while (!@freeslot
-	 ||
-	 ($round_num_freeslots > @freeslot && $todo_ptr+1 > $#jobstep_todo))
-  {
-    last THISROUND if $main::please_freeze;
-    if ($main::please_info)
-    {
-      $main::please_info = 0;
-      freeze();
-      create_output_collection();
-      save_meta(1);
-      update_progress_stats();
-    }
-    my $gotsome
-	= readfrompipes ()
-	+ reapchildren ();
-    if (!$gotsome || ($latest_refresh + 2 < scalar time))
-    {
-      check_refresh_wanted();
-      check_squeue();
-      update_progress_stats();
-    }
-    elsif (time - $progress_stats_updated >= 30 || $progress_is_dirty)
-    {
-      update_progress_stats();
-    }
-    if (!$gotsome) {
-      select (undef, undef, undef, 0.1);
-    }
-    $working_slot_count = scalar(grep { $_->{node}->{fail_count} == 0 &&
-                                        $_->{node}->{hold_count} < 4 } @slot);
-    if (($thisround_failed_multiple >= 8 && $thisround_succeeded == 0) ||
-	($thisround_failed_multiple >= 16 && $thisround_failed_multiple > $thisround_succeeded))
-    {
-      my $message = "Repeated failure rate too high ($thisround_failed_multiple/"
-	  .($thisround_failed+$thisround_succeeded)
-	  .") -- giving up on this round";
-      Log (undef, $message);
-      last THISROUND;
-    }
-
-    # move slots from freeslot to holdslot (or back to freeslot) if necessary
-    for (my $i=$#freeslot; $i>=0; $i--) {
-      if ($slot[$freeslot[$i]]->{node}->{hold_until} > scalar time) {
-	push @holdslot, (splice @freeslot, $i, 1);
-      }
-    }
-    for (my $i=$#holdslot; $i>=0; $i--) {
-      if ($slot[$holdslot[$i]]->{node}->{hold_until} <= scalar time) {
-	push @freeslot, (splice @holdslot, $i, 1);
-      }
-    }
-
-    # give up if no nodes are succeeding
-    if ($working_slot_count < 1) {
-      Log(undef, "Every node has failed -- giving up");
-      last THISROUND;
-    }
-  }
-}
-
-
-push @freeslot, splice @holdslot;
-map { $slot[$freeslot[$_]]->{node}->{losing_streak} = 0 } (0..$#freeslot);
-
-
-Log (undef, "wait for last ".(scalar keys %proc)." children to finish");
-while (%proc)
-{
-  if ($main::please_continue) {
-    $main::please_continue = 0;
-    goto THISROUND;
-  }
-  $main::please_info = 0, freeze(), create_output_collection(), save_meta(1) if $main::please_info;
-  readfrompipes ();
-  if (!reapchildren())
-  {
-    check_refresh_wanted();
-    check_squeue();
-    update_progress_stats();
-    select (undef, undef, undef, 0.1);
-    killem (keys %proc) if $main::please_freeze;
-  }
-}
-
-update_progress_stats();
-freeze_if_want_freeze();
-
-
-if (!defined $main::success)
-{
-  if (!@jobstep_todo) {
-    $main::success = 1;
-  } elsif ($working_slot_count < 1) {
-    save_output_collection();
-    save_meta();
-    exit_retry_unlocked();
-  } elsif ($thisround_succeeded == 0 &&
-           ($thisround_failed == 0 || $thisround_failed > 4)) {
-    my $message = "stop because $thisround_failed tasks failed and none succeeded";
-    Log (undef, $message);
-    $main::success = 0;
-  }
-}
-
-goto ONELEVEL if !defined $main::success;
-
-
-release_allocation();
-freeze();
-my $collated_output = save_output_collection();
-Log (undef, "finish");
-
-my $final_log = save_meta();
-
-my $final_state;
-if ($collated_output && $final_log && $main::success) {
-  $final_state = 'Complete';
-} else {
-  $final_state = 'Failed';
-}
-$Job->update_attributes('state' => $final_state);
-
-exit (($final_state eq 'Complete') ? 0 : 1);
-
-
-
-sub update_progress_stats
-{
-  $progress_stats_updated = time;
-  return if !$progress_is_dirty;
-  my ($todo, $done, $running) = (scalar @jobstep_todo,
-                                 scalar @jobstep_done,
-                                 scalar keys(%proc));
-  $Job->{'tasks_summary'} ||= {};
-  $Job->{'tasks_summary'}->{'todo'} = $todo;
-  $Job->{'tasks_summary'}->{'done'} = $done;
-  $Job->{'tasks_summary'}->{'running'} = $running;
-  $Job->update_attributes('tasks_summary' => $Job->{'tasks_summary'});
-  Log (undef, "status: $done done, $running running, $todo todo");
-  $progress_is_dirty = 0;
-}
-
-
-
-sub reapchildren
-{
-  my $children_reaped = 0;
-  my @successful_task_uuids = ();
-
-  while((my $pid = waitpid (-1, WNOHANG)) > 0)
-  {
-    my $childstatus = $?;
-
-    my $whatslot = ($slot[$proc{$pid}->{slot}]->{node}->{name}
-                    . "."
-                    . $slot[$proc{$pid}->{slot}]->{cpu});
-    my $jobstepidx = $proc{$pid}->{jobstepidx};
-
-    readfrompipes_after_exit ($jobstepidx);
-
-    $children_reaped++;
-    my $elapsed = time - $proc{$pid}->{time};
-    my $Jobstep = $jobstep[$jobstepidx];
-
-    my $exitvalue = $childstatus >> 8;
-    my $exitinfo = "exit ".exit_status_s($childstatus);
-    $Jobstep->{'arvados_task'}->reload;
-    my $task_success = $Jobstep->{'arvados_task'}->{success};
-
-    Log ($jobstepidx, "child $pid on $whatslot $exitinfo success=$task_success");
-
-    if (!defined $task_success) {
-      # task did not indicate one way or the other --> fail
-      Log($jobstepidx, sprintf(
-            "ERROR: Task process exited %s, but never updated its task record to indicate success and record its output.",
-            exit_status_s($childstatus)));
-      $Jobstep->{'arvados_task'}->{success} = 0;
-      retry_op(sub { $Jobstep->{'arvados_task'}->save; }, "job_tasks.update API");
-      $task_success = 0;
-    }
-
-    if (!$task_success)
-    {
-      my $temporary_fail;
-      $temporary_fail ||= $Jobstep->{tempfail};
-      $temporary_fail ||= ($exitvalue == TASK_TEMPFAIL);
-
-      ++$thisround_failed;
-      ++$thisround_failed_multiple if $Jobstep->{'failures'} >= 1;
-
-      # Check for signs of a failed or misconfigured node
-      if (++$slot[$proc{$pid}->{slot}]->{node}->{losing_streak} >=
-          2+$slot[$proc{$pid}->{slot}]->{node}->{ncpus}) {
-        # Don't count this against jobstep failure thresholds if this
-        # node is already suspected faulty and srun exited quickly
-        if ($slot[$proc{$pid}->{slot}]->{node}->{hold_until} &&
-            $elapsed < 5) {
-          Log ($jobstepidx, "blaming failure on suspect node " .
-               $slot[$proc{$pid}->{slot}]->{node}->{name});
-          $temporary_fail ||= 1;
-        }
-        ban_node_by_slot($proc{$pid}->{slot});
-      }
-
-      Log ($jobstepidx, sprintf('failure (#%d, %s) after %d seconds',
-                                ++$Jobstep->{'failures'},
-                                $temporary_fail ? 'temporary' : 'permanent',
-                                $elapsed));
-
-      if (!$temporary_fail || $Jobstep->{'failures'} >= 3) {
-        # Give up on this task, and the whole job
-        $main::success = 0;
-      }
-      # Put this task back on the todo queue
-      push @jobstep_todo, $jobstepidx;
-      $Job->{'tasks_summary'}->{'failed'}++;
-    }
-    else # task_success
-    {
-      push @successful_task_uuids, $Jobstep->{'arvados_task'}->{uuid};
-      ++$thisround_succeeded;
-      $slot[$proc{$pid}->{slot}]->{node}->{losing_streak} = 0;
-      $slot[$proc{$pid}->{slot}]->{node}->{hold_until} = 0;
-      $slot[$proc{$pid}->{slot}]->{node}->{fail_count} = 0;
-      push @jobstep_done, $jobstepidx;
-      Log ($jobstepidx, "success in $elapsed seconds");
-    }
-    $Jobstep->{exitcode} = $childstatus;
-    $Jobstep->{finishtime} = time;
-    $Jobstep->{'arvados_task'}->{finished_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{finishtime});
-    retry_op(sub { $Jobstep->{'arvados_task'}->save; }, "job_tasks.update API");
-    Log ($jobstepidx, sprintf("task output (%d bytes): %s",
-                              length($Jobstep->{'arvados_task'}->{output}),
-                              $Jobstep->{'arvados_task'}->{output}));
-
-    close $reader{$jobstepidx};
-    delete $reader{$jobstepidx};
-    delete $slot[$proc{$pid}->{slot}]->{pid};
-    push @freeslot, $proc{$pid}->{slot};
-    delete $proc{$pid};
-
-    $progress_is_dirty = 1;
-  }
-
-  if (scalar(@successful_task_uuids) > 0)
-  {
-    Log (undef, sprintf("%d tasks exited (%d succeeded), checking for new tasks from API server.", $children_reaped, scalar(@successful_task_uuids)));
-    # Load new tasks
-    my $newtask_list = [];
-    my $newtask_results;
-    do {
-      $newtask_results = api_call(
-        "job_tasks/list",
-        'filters' => [["created_by_job_task_uuid","in",\@successful_task_uuids]],
-        'order' => 'qsequence',
-        'offset' => scalar(@$newtask_list),
-          );
-      push(@$newtask_list, @{$newtask_results->{items}});
-    } while (@{$newtask_results->{items}});
-    Log (undef, sprintf("Got %d new tasks from API server.", scalar(@$newtask_list)));
-    foreach my $arvados_task (@$newtask_list) {
-      my $jobstep = {
-        'level' => $arvados_task->{'sequence'},
-        'failures' => 0,
-        'arvados_task' => $arvados_task
-      };
-      push @jobstep, $jobstep;
-      push @jobstep_todo, $#jobstep;
-    }
-  }
-
-  return $children_reaped;
-}
-
-sub check_refresh_wanted
-{
-  my @stat = stat $ENV{"CRUNCH_REFRESH_TRIGGER"};
-  if (@stat &&
-      $stat[9] > $latest_refresh &&
-      # ...and we have actually locked the job record...
-      $job_id eq $Job->{'uuid'}) {
-    $latest_refresh = scalar time;
-    my $Job2 = api_call("jobs/get", uuid => $jobspec);
-    for my $attr ('cancelled_at',
-                  'cancelled_by_user_uuid',
-                  'cancelled_by_client_uuid',
-                  'state') {
-      $Job->{$attr} = $Job2->{$attr};
-    }
-    if ($Job->{'state'} ne "Running") {
-      if ($Job->{'state'} eq "Cancelled") {
-        Log (undef, "Job cancelled at " . $Job->{'cancelled_at'} . " by user " . $Job->{'cancelled_by_user_uuid'});
-      } else {
-        Log (undef, "Job state unexpectedly changed to " . $Job->{'state'});
-      }
-      $main::success = 0;
-      $main::please_freeze = 1;
-    }
-  }
-}
-
-sub check_squeue
-{
-  my $last_squeue_check = $squeue_checked;
-
-  # Do not call `squeue` or check the kill list more than once every
-  # 15 seconds.
-  return if $last_squeue_check > time - 15;
-  $squeue_checked = time;
-
-  # Look for children from which we haven't received stderr data since
-  # the last squeue check. If no such children exist, all procs are
-  # alive and there's no need to even look at squeue.
-  #
-  # As long as the crunchstat poll interval (10s) is shorter than the
-  # squeue check interval (15s) this should make the squeue check an
-  # infrequent event.
-  my $silent_procs = 0;
-  for my $js (map {$jobstep[$_->{jobstepidx}]} values %proc)
-  {
-    if (!exists($js->{stderr_at}))
-    {
-      $js->{stderr_at} = 0;
-    }
-    if ($js->{stderr_at} < $last_squeue_check)
-    {
-      $silent_procs++;
-    }
-  }
-  return if $silent_procs == 0;
-
-  # use killem() on procs whose killtime is reached
-  while (my ($pid, $procinfo) = each %proc)
-  {
-    my $js = $jobstep[$procinfo->{jobstepidx}];
-    if (exists $procinfo->{killtime}
-        && $procinfo->{killtime} <= time
-        && $js->{stderr_at} < $last_squeue_check)
-    {
-      my $sincewhen = "";
-      if ($js->{stderr_at}) {
-        $sincewhen = " in last " . (time - $js->{stderr_at}) . "s";
-      }
-      Log($procinfo->{jobstepidx}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
-      killem ($pid);
-    }
-  }
-
-  if (!$have_slurm)
-  {
-    # here is an opportunity to check for mysterious problems with local procs
-    return;
-  }
-
-  # Get a list of steps still running.  Note: squeue(1) says --steps
-  # selects a format (which we override anyway) and allows us to
-  # specify which steps we're interested in (which we don't).
-  # Importantly, it also changes the meaning of %j from "job name" to
-  # "step name" and (although this isn't mentioned explicitly in the
-  # docs) switches from "one line per job" mode to "one line per step"
-  # mode. Without it, we'd just get a list of one job, instead of a
-  # list of N steps.
-  my @squeue = `squeue --jobs=\Q$ENV{SLURM_JOB_ID}\E --steps --format='%j' --noheader`;
-  if ($? != 0)
-  {
-    Log(undef, "warning: squeue exit status $? ($!)");
-    return;
-  }
-  chop @squeue;
-
-  # which of my jobsteps are running, according to squeue?
-  my %ok;
-  for my $jobstepname (@squeue)
-  {
-    $ok{$jobstepname} = 1;
-  }
-
-  # Check for child procs >60s old and not mentioned by squeue.
-  while (my ($pid, $procinfo) = each %proc)
-  {
-    if ($procinfo->{time} < time - 60
-        && $procinfo->{jobstepname}
-        && !exists $ok{$procinfo->{jobstepname}}
-        && !exists $procinfo->{killtime})
-    {
-      # According to slurm, this task has ended (successfully or not)
-      # -- but our srun child hasn't exited. First we must wait (30
-      # seconds) in case this is just a race between communication
-      # channels. Then, if our srun child process still hasn't
-      # terminated, we'll conclude some slurm communication
-      # error/delay has caused the task to die without notifying srun,
-      # and we'll kill srun ourselves.
-      $procinfo->{killtime} = time + 30;
-      Log($procinfo->{jobstepidx}, "notice: task is not in slurm queue but srun process $pid has not exited");
-    }
-  }
-}
-
-sub check_sinfo
-{
-  # If a node fails in a multi-node "srun" call during job setup, the call
-  # may hang instead of exiting with a nonzero code.  This function checks
-  # "sinfo" for the health of the nodes that were allocated and ensures that
-  # they are all still in the "alloc" state.  If a node that is allocated to
-  # this job is not in "alloc" state, then set please_freeze.
-  #
-  # This is only called from srun_sync() for node configuration.  If a
-  # node fails doing actual work, there are other recovery mechanisms.
-
-  # Do not call `sinfo` more than once every 15 seconds.
-  return if $sinfo_checked > time - 15;
-  $sinfo_checked = time;
-
-  # The output format "%t" means output node states.
-  my @sinfo = `sinfo --nodes=\Q$ENV{SLURM_NODELIST}\E --noheader -o "%t"`;
-  if ($? != 0)
-  {
-    Log(undef, "warning: sinfo exit status $? ($!)");
-    return;
-  }
-  chop @sinfo;
-
-  foreach (@sinfo)
-  {
-    if ($_ != "alloc" && $_ != "alloc*") {
-      $main::please_freeze = 1;
-    }
-  }
-}
-
-sub release_allocation
-{
-  if ($have_slurm)
-  {
-    Log (undef, "release job allocation");
-    system "scancel $ENV{SLURM_JOB_ID}";
-  }
-}
-
-
-sub readfrompipes
-{
-  my $gotsome = 0;
-  my %fd_job;
-  my $sel = IO::Select->new();
-  foreach my $jobstepidx (keys %reader)
-  {
-    my $fd = $reader{$jobstepidx};
-    $sel->add($fd);
-    $fd_job{$fd} = $jobstepidx;
-
-    if (my $stdout_fd = $jobstep[$jobstepidx]->{stdout_r}) {
-      $sel->add($stdout_fd);
-      $fd_job{$stdout_fd} = $jobstepidx;
-    }
-  }
-  # select on all reader fds with 0.1s timeout
-  my @ready_fds = $sel->can_read(0.1);
-  foreach my $fd (@ready_fds)
-  {
-    my $buf;
-    if (0 < sysread ($fd, $buf, 65536))
-    {
-      $gotsome = 1;
-      print STDERR $buf if $ENV{CRUNCH_DEBUG};
-
-      my $jobstepidx = $fd_job{$fd};
-      if ($jobstep[$jobstepidx]->{stdout_r} == $fd) {
-        $jobstep[$jobstepidx]->{stdout_captured} .= $buf;
-        next;
-      }
-
-      $jobstep[$jobstepidx]->{stderr_at} = time;
-      $jobstep[$jobstepidx]->{stderr} .= $buf;
-
-      # Consume everything up to the last \n
-      preprocess_stderr ($jobstepidx);
-
-      if (length ($jobstep[$jobstepidx]->{stderr}) > 16384)
-      {
-        # If we get a lot of stderr without a newline, chop off the
-        # front to avoid letting our buffer grow indefinitely.
-        substr ($jobstep[$jobstepidx]->{stderr},
-                0, length($jobstep[$jobstepidx]->{stderr}) - 8192) = "";
-      }
-    }
-  }
-  return $gotsome;
-}
-
-
-# Consume all full lines of stderr for a jobstep. Everything after the
-# last newline will remain in $jobstep[$jobstepidx]->{stderr} after
-# returning.
-sub preprocess_stderr
-{
-  my $jobstepidx = shift;
-  # slotindex is only defined for children running Arvados job tasks.
-  # Be prepared to handle the undef case (for setup srun calls, etc.).
-  my $job_slot_index = $jobstep[$jobstepidx]->{slotindex};
-
-  while ($jobstep[$jobstepidx]->{stderr} =~ /^(.*?)\n/) {
-    my $line = $1;
-    substr $jobstep[$jobstepidx]->{stderr}, 0, 1+length($line), "";
-    Log ($jobstepidx, "stderr $line");
-    if ($line =~ /srun: error: (SLURM job $ENV{SLURM_JOB_ID} has expired|Unable to confirm allocation for job $ENV{SLURM_JOB_ID})/i) {
-      # If the allocation is revoked, we can't possibly continue, so mark all
-      # nodes as failed.  This will cause the overall exit code to be
-      # EX_RETRY_UNLOCKED instead of failure so that crunch_dispatch can re-run
-      # this job.
-      $main::please_freeze = 1;
-      foreach my $st (@slot) {
-        $st->{node}->{fail_count}++;
-      }
-    }
-    elsif ($line =~ /srun: error: .*?\b(Node failure on|Aborting, .*?\bio error\b|cannot communicate with node .* aborting job)/i) {
-      $jobstep[$jobstepidx]->{tempfail} = 1;
-      if (defined($job_slot_index)) {
-        $slot[$job_slot_index]->{node}->{fail_count}++;
-        ban_node_by_slot($job_slot_index);
-      }
-    }
-    elsif ($line =~ /srun: error: (Unable to create job step|.*?: Communication connection failure)/i) {
-      $jobstep[$jobstepidx]->{tempfail} = 1;
-      ban_node_by_slot($job_slot_index) if (defined($job_slot_index));
-    }
-    elsif ($line =~ /\bKeep(Read|Write|Request)Error:/) {
-      $jobstep[$jobstepidx]->{tempfail} = 1;
-    }
-  }
-}
-
-
-# Read whatever is still available on its stderr+stdout pipes after
-# the given child process has exited.
-sub readfrompipes_after_exit
-{
-  my $jobstepidx = shift;
-
-  # The fact that the child has exited allows some convenient
-  # simplifications: (1) all data must have already been written, so
-  # there's no need to wait for more once sysread returns 0; (2) the
-  # total amount of data available is bounded by the pipe buffer size,
-  # so it's safe to read everything into one string.
-  my $buf;
-  while (0 < sysread ($reader{$jobstepidx}, $buf, 65536)) {
-    $jobstep[$jobstepidx]->{stderr_at} = time;
-    $jobstep[$jobstepidx]->{stderr} .= $buf;
-  }
-  if ($jobstep[$jobstepidx]->{stdout_r}) {
-    while (0 < sysread ($jobstep[$jobstepidx]->{stdout_r}, $buf, 65536)) {
-      $jobstep[$jobstepidx]->{stdout_captured} .= $buf;
-    }
-  }
-  preprocess_stderr ($jobstepidx);
-
-  map {
-    Log ($jobstepidx, "stderr $_");
-  } split ("\n", $jobstep[$jobstepidx]->{stderr});
-  $jobstep[$jobstepidx]->{stderr} = '';
-}
-
-sub fetch_block
-{
-  my $hash = shift;
-  my $keep;
-  if (!open($keep, "-|", "arv-get", "--retries", retry_count(), $hash)) {
-    Log(undef, "fetch_block run error from arv-get $hash: $!");
-    return undef;
-  }
-  my $output_block = "";
-  while (1) {
-    my $buf;
-    my $bytes = sysread($keep, $buf, 1024 * 1024);
-    if (!defined $bytes) {
-      Log(undef, "fetch_block read error from arv-get: $!");
-      $output_block = undef;
-      last;
-    } elsif ($bytes == 0) {
-      # sysread returns 0 at the end of the pipe.
-      last;
-    } else {
-      # some bytes were read into buf.
-      $output_block .= $buf;
-    }
-  }
-  close $keep;
-  if ($?) {
-    Log(undef, "fetch_block arv-get exited " . exit_status_s($?));
-    $output_block = undef;
-  }
-  return $output_block;
-}
-
-# Create a collection by concatenating the output of all tasks (each
-# task's output is either a manifest fragment, a locator for a
-# manifest fragment stored in Keep, or nothing at all). Return the
-# portable_data_hash of the new collection.
-sub create_output_collection
-{
-  Log (undef, "collate");
-
-  my ($child_out, $child_in);
-  # This depends on the python-arvados-python-client package, which needs to be installed
-  # on the machine running crunch-dispatch (typically, the API server).
-  my $pid = open2($child_out, $child_in, '/usr/share/python2.7/dist/python-arvados-python-client/bin/python', '-c', q{
-import arvados
-import sys
-print (arvados.api("v1").collections().
-       create(body={"manifest_text": sys.stdin.read(),
-                    "owner_uuid": sys.argv[2]}).
-       execute(num_retries=int(sys.argv[1]))["portable_data_hash"])
-}, retry_count(), $Job->{owner_uuid});
-
-  my $task_idx = -1;
-  my $manifest_size = 0;
-  for (@jobstep)
-  {
-    ++$task_idx;
-    my $output = $_->{'arvados_task'}->{output};
-    next if (!defined($output));
-    my $next_write;
-    if ($output =~ /^[0-9a-f]{32}(\+\S+)*$/) {
-      $next_write = fetch_block($output);
-    } else {
-      $next_write = $output;
-    }
-    if (defined($next_write)) {
-      if (!defined(syswrite($child_in, $next_write))) {
-        # There's been an error writing.  Stop the loop.
-        # We'll log details about the exit code later.
-        last;
-      } else {
-        $manifest_size += length($next_write);
-      }
-    } else {
-      my $uuid = $_->{'arvados_task'}->{'uuid'};
-      Log (undef, "Error retrieving '$output' output by task $task_idx ($uuid)");
-      $main::success = 0;
-    }
-  }
-  close($child_in);
-  Log(undef, "collated output manifest text to send to API server is $manifest_size bytes with access tokens");
-
-  my $joboutput;
-  my $s = IO::Select->new($child_out);
-  if ($s->can_read(120)) {
-    sysread($child_out, $joboutput, 1024 * 1024);
-    waitpid($pid, 0);
-    if ($?) {
-      Log(undef, "output collection creation exited " . exit_status_s($?));
-      $joboutput = undef;
-    } else {
-      chomp($joboutput);
-    }
-  } else {
-    Log (undef, "timed out while creating output collection");
-    foreach my $signal (2, 2, 2, 15, 15, 9) {
-      kill($signal, $pid);
-      last if waitpid($pid, WNOHANG) == -1;
-      sleep(1);
-    }
-  }
-  close($child_out);
-
-  return $joboutput;
-}
-
-# Calls create_output_collection, logs the result, and returns it.
-# If that was successful, save that as the output in the job record.
-sub save_output_collection {
-  my $collated_output = create_output_collection();
-
-  if (!$collated_output) {
-    Log(undef, "Failed to write output collection");
-  }
-  else {
-    Log(undef, "job output $collated_output");
-    $Job->update_attributes('output' => $collated_output);
-  }
-  return $collated_output;
-}
-
-sub killem
-{
-  foreach (@_)
-  {
-    my $sig = 2;		# SIGINT first
-    if (exists $proc{$_}->{"sent_$sig"} &&
-	time - $proc{$_}->{"sent_$sig"} > 4)
-    {
-      $sig = 15;		# SIGTERM if SIGINT doesn't work
-    }
-    if (exists $proc{$_}->{"sent_$sig"} &&
-	time - $proc{$_}->{"sent_$sig"} > 4)
-    {
-      $sig = 9;			# SIGKILL if SIGTERM doesn't work
-    }
-    if (!exists $proc{$_}->{"sent_$sig"})
-    {
-      Log ($proc{$_}->{jobstepidx}, "sending 2x signal $sig to pid $_");
-      kill $sig, $_;
-      select (undef, undef, undef, 0.1);
-      if ($sig == 2)
-      {
-	kill $sig, $_;	   # srun wants two SIGINT to really interrupt
-      }
-      $proc{$_}->{"sent_$sig"} = time;
-      $proc{$_}->{"killedafter"} = time - $proc{$_}->{"time"};
-    }
-  }
-}
-
-
-sub fhbits
-{
-  my($bits);
-  for (@_) {
-    vec($bits,fileno($_),1) = 1;
-  }
-  $bits;
-}
-
-
-# Send log output to Keep via arv-put.
-#
-# $log_pipe_in and $log_pipe_out are the input and output filehandles to the arv-put pipe.
-# $log_pipe_out_buf is a string containing all output read from arv-put so far.
-# $log_pipe_out_select is an IO::Select object around $log_pipe_out.
-# $log_pipe_pid is the pid of the arv-put subprocess.
-#
-# The only functions that should access these variables directly are:
-#
-# log_writer_start($logfilename)
-#     Starts an arv-put pipe, reading data on stdin and writing it to
-#     a $logfilename file in an output collection.
-#
-# log_writer_read_output([$timeout])
-#     Read output from $log_pipe_out and append it to $log_pipe_out_buf.
-#     Passes $timeout to the select() call, with a default of 0.01.
-#     Returns the result of the last read() call on $log_pipe_out, or
-#     -1 if read() wasn't called because select() timed out.
-#     Only other log_writer_* functions should need to call this.
-#
-# log_writer_send($txt)
-#     Writes $txt to the output log collection.
-#
-# log_writer_finish()
-#     Closes the arv-put pipe and returns the output that it produces.
-#
-# log_writer_is_active()
-#     Returns a true value if there is currently a live arv-put
-#     process, false otherwise.
-#
-my ($log_pipe_in, $log_pipe_out, $log_pipe_out_buf, $log_pipe_out_select,
-    $log_pipe_pid);
-
-sub log_writer_start($)
-{
-  my $logfilename = shift;
-  $log_pipe_pid = open2($log_pipe_out, $log_pipe_in,
-                        'arv-put',
-                        '--stream',
-                        '--retries', '6',
-                        '--filename', $logfilename,
-                        '-');
-  $log_pipe_out_buf = "";
-  $log_pipe_out_select = IO::Select->new($log_pipe_out);
-}
-
-sub log_writer_read_output {
-  my $timeout = shift || 0.01;
-  my $read = -1;
-  while ($read && $log_pipe_out_select->can_read($timeout)) {
-    $read = read($log_pipe_out, $log_pipe_out_buf, 65536,
-                 length($log_pipe_out_buf));
-  }
-  if (!defined($read)) {
-    Log(undef, "error reading log manifest from arv-put: $!");
-  }
-  return $read;
-}
-
-sub log_writer_send($)
-{
-  my $txt = shift;
-  print $log_pipe_in $txt;
-  log_writer_read_output();
-}
-
-sub log_writer_finish()
-{
-  return unless $log_pipe_pid;
-
-  close($log_pipe_in);
-
-  my $logger_failed = 0;
-  my $read_result = log_writer_read_output(600);
-  if ($read_result == -1) {
-    $logger_failed = -1;
-    Log (undef, "timed out reading from 'arv-put'");
-  } elsif ($read_result != 0) {
-    $logger_failed = -2;
-    Log(undef, "failed to read arv-put log manifest to EOF");
-  }
-
-  waitpid($log_pipe_pid, 0);
-  if ($?) {
-    $logger_failed ||= $?;
-    Log(undef, "log_writer_finish: arv-put exited " . exit_status_s($?))
-  }
-
-  close($log_pipe_out);
-  my $arv_put_output = $logger_failed ? undef : $log_pipe_out_buf;
-  $log_pipe_pid = $log_pipe_in = $log_pipe_out = $log_pipe_out_buf =
-      $log_pipe_out_select = undef;
-
-  return $arv_put_output;
-}
-
-sub log_writer_is_active() {
-  return $log_pipe_pid;
-}
-
-sub Log				# ($jobstepidx, $logmessage)
-{
-  my ($jobstepidx, $logmessage) = @_;
-  if ($logmessage =~ /\n/) {
-    for my $line (split (/\n/, $_[1])) {
-      Log ($jobstepidx, $line);
-    }
-    return;
-  }
-  my $fh = select STDERR; $|=1; select $fh;
-  my $task_qseq = '';
-  if (defined($jobstepidx) && exists($jobstep[$jobstepidx]->{arvados_task})) {
-    $task_qseq = $jobstepidx;
-  }
-  my $message = sprintf ("%s %d %s %s", $job_id, $$, $task_qseq, $logmessage);
-  $message =~ s{([^ -\176])}{"\\" . sprintf ("%03o", ord($1))}ge;
-  $message .= "\n";
-  my $datetime;
-  if (log_writer_is_active() || -t STDERR) {
-    my @gmtime = gmtime;
-    $datetime = sprintf ("%04d-%02d-%02d_%02d:%02d:%02d",
-			 $gmtime[5]+1900, $gmtime[4]+1, @gmtime[3,2,1,0]);
-  }
-  print STDERR ((-t STDERR) ? ($datetime." ".$message) : $message);
-
-  if (log_writer_is_active()) {
-    log_writer_send($datetime . " " . $message);
-  }
-}
-
-
-sub croak
-{
-  my ($package, $file, $line) = caller;
-  my $message = "@_ at $file line $line\n";
-  Log (undef, $message);
-  release_allocation();
-  freeze() if @jobstep_todo;
-  create_output_collection() if @jobstep_todo;
-  cleanup();
-  save_meta();
-  die;
-}
-
-
-sub cleanup
-{
-  return unless $Job;
-  if ($Job->{'state'} eq 'Cancelled') {
-    $Job->update_attributes('finished_at' => scalar gmtime);
-  } else {
-    $Job->update_attributes('state' => 'Failed');
-  }
-}
-
-
-sub save_meta
-{
-  my $justcheckpoint = shift; # false if this will be the last meta saved
-  return if $justcheckpoint;  # checkpointing is not relevant post-Warehouse.pm
-  return unless log_writer_is_active();
-  my $log_manifest = log_writer_finish();
-  return unless defined($log_manifest);
-
-  if ($Job->{log}) {
-    my $prev_log_coll = api_call("collections/get", uuid => $Job->{log});
-    $log_manifest = $prev_log_coll->{manifest_text} . $log_manifest;
-  }
-
-  my $log_coll = api_call(
-    "collections/create", ensure_unique_name => 1, collection => {
-      manifest_text => $log_manifest,
-      owner_uuid => $Job->{owner_uuid},
-      name => sprintf("Log from %s job %s", $Job->{script}, $Job->{uuid}),
-    });
-  Log(undef, "log collection is " . $log_coll->{portable_data_hash});
-  $Job->update_attributes('log' => $log_coll->{portable_data_hash});
-
-  return $log_coll->{portable_data_hash};
-}
-
-
-sub freeze_if_want_freeze
-{
-  if ($main::please_freeze)
-  {
-    release_allocation();
-    if (@_)
-    {
-      # kill some srun procs before freeze+stop
-      map { $proc{$_} = {} } @_;
-      while (%proc)
-      {
-	killem (keys %proc);
-	select (undef, undef, undef, 0.1);
-	my $died;
-	while (($died = waitpid (-1, WNOHANG)) > 0)
-	{
-	  delete $proc{$died};
-	}
-      }
-    }
-    freeze();
-    create_output_collection();
-    cleanup();
-    save_meta();
-    exit 1;
-  }
-}
-
-
-sub freeze
-{
-  Log (undef, "Freeze not implemented");
-  return;
-}
-
-
-sub thaw
-{
-  croak ("Thaw not implemented");
-}
-
-
-sub freezequote
-{
-  my $s = shift;
-  $s =~ s/\\/\\\\/g;
-  $s =~ s/\n/\\n/g;
-  return $s;
-}
-
-
-sub freezeunquote
-{
-  my $s = shift;
-  $s =~ s{\\(.)}{$1 eq "n" ? "\n" : $1}ge;
-  return $s;
-}
-
-sub srun_sync
-{
-  my $srunargs = shift;
-  my $execargs = shift;
-  my $opts = shift || {};
-  my $stdin = shift;
-
-  my $label = exists $opts->{label} ? $opts->{label} : "@$execargs";
-  Log (undef, "$label: start");
-
-  my ($stderr_r, $stderr_w);
-  pipe $stderr_r, $stderr_w or croak("pipe() failed: $!");
-
-  my ($stdout_r, $stdout_w);
-  pipe $stdout_r, $stdout_w or croak("pipe() failed: $!");
-
-  my $started_srun = scalar time;
-
-  my $srunpid = fork();
-  if ($srunpid == 0)
-  {
-    close($stderr_r);
-    close($stdout_r);
-    fcntl($stderr_w, F_SETFL, 0) or croak($!); # no close-on-exec
-    fcntl($stdout_w, F_SETFL, 0) or croak($!);
-    open(STDERR, ">&", $stderr_w) or croak ($!);
-    open(STDOUT, ">&", $stdout_w) or croak ($!);
-    srun ($srunargs, $execargs, $opts, $stdin);
-    exit (1);
-  }
-  close($stderr_w);
-  close($stdout_w);
-
-  set_nonblocking($stderr_r);
-  set_nonblocking($stdout_r);
-
-  # Add entries to @jobstep and %proc so check_squeue() and
-  # freeze_if_want_freeze() can treat it like a job task process.
-  push @jobstep, {
-    stderr => '',
-    stderr_at => 0,
-    stderr_captured => '',
-    stdout_r => $stdout_r,
-    stdout_captured => '',
-  };
-  my $jobstepidx = $#jobstep;
-  $proc{$srunpid} = {
-    jobstepidx => $jobstepidx,
-  };
-  $reader{$jobstepidx} = $stderr_r;
-
-  while ($srunpid != waitpid ($srunpid, WNOHANG)) {
-    my $busy = readfrompipes();
-    if (!$busy || ($latest_refresh + 2 < scalar time)) {
-      check_refresh_wanted();
-      check_squeue();
-      check_sinfo();
-    }
-    if (!$busy) {
-      select(undef, undef, undef, 0.1);
-    }
-    if (($started_srun + $srun_sync_timeout) < scalar time) {
-      # Exceeded general timeout for "srun_sync" operations, likely
-      # means something got stuck on the remote node.
-      Log(undef, "srun_sync exceeded timeout, will fail.");
-      $main::please_freeze = 1;
-    }
-    killem(keys %proc) if $main::please_freeze;
-  }
-  my $exited = $?;
-
-  readfrompipes_after_exit ($jobstepidx);
-
-  Log (undef, "$label: exit ".exit_status_s($exited));
-
-  close($stdout_r);
-  close($stderr_r);
-  delete $proc{$srunpid};
-  delete $reader{$jobstepidx};
-
-  my $j = pop @jobstep;
-  # If the srun showed signs of tempfail, ensure the caller treats that as a
-  # failure case.
-  if ($main::please_freeze || $j->{tempfail}) {
-    $exited ||= 255;
-  }
-  return ($exited, $j->{stdout_captured}, $j->{stderr_captured}, $j->{tempfail});
-}
-
-
-sub srun
-{
-  my $srunargs = shift;
-  my $execargs = shift;
-  my $opts = shift || {};
-  my $stdin = shift;
-  my $args = $have_slurm ? [@$srunargs, @$execargs] : $execargs;
-
-  $Data::Dumper::Terse = 1;
-  $Data::Dumper::Indent = 0;
-  my $show_cmd = Dumper($args);
-  $show_cmd =~ s/(TOKEN\\*=)[^\s\']+/${1}[...]/g;
-  $show_cmd =~ s/\n/ /g;
-  if ($opts->{fork}) {
-    Log(undef, "starting: $show_cmd");
-  } else {
-    # This is a child process: parent is in charge of reading our
-    # stderr and copying it to Log() if needed.
-    warn "starting: $show_cmd\n";
-  }
-
-  if (defined $stdin) {
-    my $child = open STDIN, "-|";
-    defined $child or die "no fork: $!";
-    if ($child == 0) {
-      print $stdin or die $!;
-      close STDOUT or die $!;
-      exit 0;
-    }
-  }
-
-  return system (@$args) if $opts->{fork};
-
-  exec @$args;
-  warn "ENV size is ".length(join(" ",%ENV));
-  die "exec failed: $!: @$args";
-}
-
-
-sub ban_node_by_slot {
-  # Don't start any new jobsteps on this node for 60 seconds
-  my $slotid = shift;
-  $slot[$slotid]->{node}->{hold_until} = 60 + scalar time;
-  $slot[$slotid]->{node}->{hold_count}++;
-  Log (undef, "backing off node " . $slot[$slotid]->{node}->{name} . " for 60 seconds");
-}
-
-sub must_lock_now
-{
-  my ($lockfile, $error_message) = @_;
-  open L, ">", $lockfile or croak("$lockfile: $!");
-  if (!flock L, LOCK_EX|LOCK_NB) {
-    croak("Can't lock $lockfile: $error_message\n");
-  }
-}
-
-sub find_docker_image {
-  # Given a Keep locator, check to see if it contains a Docker image.
-  # If so, return its stream name and Docker hash.
-  # If not, return undef for both values.
-  my $locator = shift;
-  my ($streamname, $filename);
-  my $image = api_call("collections/get", uuid => $locator);
-  if ($image) {
-    foreach my $line (split(/\n/, $image->{manifest_text})) {
-      my @tokens = split(/\s+/, $line);
-      next if (!@tokens);
-      $streamname = shift(@tokens);
-      foreach my $filedata (grep(/^\d+:\d+:/, @tokens)) {
-        if (defined($filename)) {
-          return (undef, undef);  # More than one file in the Collection.
-        } else {
-          $filename = (split(/:/, $filedata, 3))[2];
-          $filename =~ s/\\([0-3][0-7][0-7])/chr(oct($1))/ge;
-        }
-      }
-    }
-  }
-  if (defined($filename) and ($filename =~ /^((?:sha256:)?[0-9A-Fa-f]{64})\.tar$/)) {
-    return ($streamname, $1);
-  } else {
-    return (undef, undef);
-  }
-}
-
-sub exit_retry_unlocked {
-  Log(undef, "Transient failure with lock acquired; asking for re-dispatch by exiting ".EX_RETRY_UNLOCKED);
-  exit(EX_RETRY_UNLOCKED);
-}
-
-sub retry_count {
-  # Calculate the number of times an operation should be retried,
-  # assuming exponential backoff, and that we're willing to retry as
-  # long as tasks have been running.  Enforce a minimum of 3 retries.
-  my ($starttime, $endtime, $timediff, $retries);
-  if (@jobstep) {
-    $starttime = $jobstep[0]->{starttime};
-    $endtime = $jobstep[-1]->{finishtime};
-  }
-  if (!defined($starttime)) {
-    $timediff = 0;
-  } elsif (!defined($endtime)) {
-    $timediff = time - $starttime;
-  } else {
-    $timediff = ($endtime - $starttime) - (time - $endtime);
-  }
-  if ($timediff > 0) {
-    $retries = int(log($timediff) / log(2));
-  } else {
-    $retries = 1;  # Use the minimum.
-  }
-  return ($retries > 3) ? $retries : 3;
-}
-
-sub retry_op {
-  # Pass in two function references.
-  # This method will be called with the remaining arguments.
-  # If it dies, retry it with exponential backoff until it succeeds,
-  # or until the current retry_count is exhausted.  After each failure
-  # that can be retried, the second function will be called with
-  # the current try count (0-based), next try time, and error message.
-  my $operation = shift;
-  my $op_text = shift;
-  my $retries = retry_count();
-  my $retry_callback = sub {
-    my ($try_count, $next_try_at, $errmsg) = @_;
-    $errmsg =~ s/\s*\bat \Q$0\E line \d+\.?\s*//;
-    $errmsg =~ s/\s/ /g;
-    $errmsg =~ s/\s+$//;
-    my $retry_msg;
-    if ($next_try_at < time) {
-      $retry_msg = "Retrying.";
-    } else {
-      my $next_try_fmt = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($next_try_at);
-      $retry_msg = "Retrying at $next_try_fmt.";
-    }
-    Log(undef, "$op_text failed: $errmsg. $retry_msg");
-  };
-  foreach my $try_count (0..$retries) {
-    my $next_try = time + (2 ** $try_count);
-    my $result = eval { $operation->(@_); };
-    if (!$@) {
-      return $result;
-    } elsif ($try_count < $retries) {
-      $retry_callback->($try_count, $next_try, $@);
-      my $sleep_time = $next_try - time;
-      sleep($sleep_time) if ($sleep_time > 0);
-    }
-  }
-  # Ensure the error message ends in a newline, so Perl doesn't add
-  # retry_op's line number to it.
-  chomp($@);
-  die($@ . "\n");
-}
-
-sub api_call {
-  # Pass in a /-separated API method name, and arguments for it.
-  # This function will call that method, retrying as needed until
-  # the current retry_count is exhausted, with a log on the first failure.
-  my $method_name = shift;
-  my $method = $arv;
-  foreach my $key (split(/\//, $method_name)) {
-    $method = $method->{$key};
-  }
-  return retry_op(sub { $method->execute(@_); }, "API method $method_name", @_);
-}
-
-sub exit_status_s {
-  # Given a $?, return a human-readable exit code string like "0" or
-  # "1" or "0 with signal 1" or "1 with signal 11".
-  my $exitcode = shift;
-  my $s = $exitcode >> 8;
-  if ($exitcode & 0x7f) {
-    $s .= " with signal " . ($exitcode & 0x7f);
-  }
-  if ($exitcode & 0x80) {
-    $s .= " with core dump";
-  }
-  return $s;
-}
-
-sub handle_readall {
-  # Pass in a glob reference to a file handle.
-  # Read all its contents and return them as a string.
-  my $fh_glob_ref = shift;
-  local $/ = undef;
-  return <$fh_glob_ref>;
-}
-
-sub tar_filename_n {
-  my $n = shift;
-  return sprintf("%s/git.%s.%d.tar", $ENV{CRUNCH_TMP}, $job_id, $n);
-}
-
-sub add_git_archive {
-  # Pass in a git archive command as a string or list, a la system().
-  # This method will save its output to be included in the archive sent to the
-  # build script.
-  my $git_input;
-  $git_tar_count++;
-  if (!open(GIT_ARCHIVE, ">", tar_filename_n($git_tar_count))) {
-    croak("Failed to save git archive: $!");
-  }
-  my $git_pid = open2(">&GIT_ARCHIVE", $git_input, @_);
-  close($git_input);
-  waitpid($git_pid, 0);
-  close(GIT_ARCHIVE);
-  if ($?) {
-    croak("Failed to save git archive: git exited " . exit_status_s($?));
-  }
-}
-
-sub combined_git_archive {
-  # Combine all saved tar archives into a single archive, then return its
-  # contents in a string.  Return undef if no archives have been saved.
-  if ($git_tar_count < 1) {
-    return undef;
-  }
-  my $base_tar_name = tar_filename_n(1);
-  foreach my $tar_to_append (map { tar_filename_n($_); } (2..$git_tar_count)) {
-    my $tar_exit = system("tar", "-Af", $base_tar_name, $tar_to_append);
-    if ($tar_exit != 0) {
-      croak("Error preparing build archive: tar -A exited " .
-            exit_status_s($tar_exit));
-    }
-  }
-  if (!open(GIT_TAR, "<", $base_tar_name)) {
-    croak("Could not open build archive: $!");
-  }
-  my $tar_contents = handle_readall(\*GIT_TAR);
-  close(GIT_TAR);
-  return $tar_contents;
-}
-
-sub set_nonblocking {
-  my $fh = shift;
-  my $flags = fcntl ($fh, F_GETFL, 0) or croak ($!);
-  fcntl ($fh, F_SETFL, $flags | O_NONBLOCK) or croak ($!);
-}
-
-__DATA__
-#!/usr/bin/env perl
-#
-# This is crunch-job's internal dispatch script.  crunch-job running on the API
-# server invokes this script on individual compute nodes, or localhost if we're
-# running a job locally.  It gets called in two modes:
-#
-# * No arguments: Installation mode.  Read a tar archive from the DATA
-#   file handle; it includes the Crunch script's source code, and
-#   maybe SDKs as well.  Those should be installed in the proper
-#   locations.  This runs outside of any Docker container, so don't try to
-#   introspect Crunch's runtime environment.
-#
-# * With arguments: Crunch script run mode.  This script should set up the
-#   environment, then run the command specified in the arguments.  This runs
-#   inside any Docker container.
-
-use Fcntl ':flock';
-use File::Path qw( make_path remove_tree );
-use POSIX qw(getcwd);
-
-use constant TASK_TEMPFAIL => 111;
-
-# Map SDK subdirectories to the path environments they belong to.
-my %SDK_ENVVARS = ("perl/lib" => "PERLLIB", "ruby/lib" => "RUBYLIB");
-
-my $destdir = $ENV{"CRUNCH_SRC"};
-my $archive_hash = $ENV{"CRUNCH_GIT_ARCHIVE_HASH"};
-my $repo = $ENV{"CRUNCH_SRC_URL"};
-my $install_dir = $ENV{"CRUNCH_INSTALL"} || (getcwd() . "/opt");
-my $job_work = $ENV{"JOB_WORK"};
-my $task_work = $ENV{"TASK_WORK"};
-
-open(STDOUT_ORIG, ">&", STDOUT);
-open(STDERR_ORIG, ">&", STDERR);
-
-for my $dir ($destdir, $job_work, $task_work) {
-  if ($dir) {
-    make_path $dir;
-    -e $dir or die "Failed to create temporary directory ($dir): $!";
-  }
-}
-
-if ($task_work) {
-  remove_tree($task_work, {keep_root => 1});
-}
-
-### Crunch script run mode
-if (@ARGV) {
-  # We want to do routine logging during task 0 only.  This gives the user
-  # the information they need, but avoids repeating the information for every
-  # task.
-  my $Log;
-  if ($ENV{TASK_SEQUENCE} eq "0") {
-    $Log = sub {
-      my $msg = shift;
-      printf STDERR_ORIG "[Crunch] $msg\n", @_;
-    };
-  } else {
-    $Log = sub { };
-  }
-
-  my $python_src = "$install_dir/python";
-  my $venv_dir = "$job_work/.arvados.venv";
-  my $venv_built = -e "$venv_dir/bin/activate";
-  if ((!$venv_built) and (-d $python_src) and can_run("virtualenv")) {
-    shell_or_die(undef, "virtualenv", "--quiet", "--system-site-packages",
-                 "--python=python2.7", $venv_dir);
-    shell_or_die(TASK_TEMPFAIL, "$venv_dir/bin/pip", "--quiet", "install", "-I", $python_src);
-    $venv_built = 1;
-    $Log->("Built Python SDK virtualenv");
-  }
-
-  my @pysdk_version_cmd = ("python", "-c",
-    "from pkg_resources import get_distribution as get; print get('arvados-python-client').version");
-  if ($venv_built) {
-    $Log->("Running in Python SDK virtualenv");
-    @pysdk_version_cmd = ();
-    my $orig_argv = join(" ", map { quotemeta($_); } @ARGV);
-    @ARGV = ("/bin/sh", "-ec",
-             ". \Q$venv_dir/bin/activate\E; exec $orig_argv");
-  } elsif (-d $python_src) {
-    $Log->("Warning: virtualenv not found inside Docker container default " .
-           "\$PATH. Can't install Python SDK.");
-  }
-
-  if (@pysdk_version_cmd) {
-    open(my $pysdk_version_pipe, "-|", @pysdk_version_cmd);
-    my $pysdk_version = <$pysdk_version_pipe>;
-    close($pysdk_version_pipe);
-    if ($? == 0) {
-      chomp($pysdk_version);
-      $Log->("Using Arvados SDK version $pysdk_version");
-    } else {
-      # A lot could've gone wrong here, but pretty much all of it means that
-      # Python won't be able to load the Arvados SDK.
-      $Log->("Warning: Arvados SDK not found");
-    }
-  }
-
-  while (my ($sdk_dir, $sdk_envkey) = each(%SDK_ENVVARS)) {
-    my $sdk_path = "$install_dir/$sdk_dir";
-    if (-d $sdk_path) {
-      if ($ENV{$sdk_envkey}) {
-        $ENV{$sdk_envkey} = "$sdk_path:" . $ENV{$sdk_envkey};
-      } else {
-        $ENV{$sdk_envkey} = $sdk_path;
-      }
-      $Log->("Arvados SDK added to %s", $sdk_envkey);
-    }
-  }
-
-  exec(@ARGV);
-  die "Cannot exec `@ARGV`: $!";
-}
-
-### Installation mode
-open L, ">", "$destdir.lock" or die "$destdir.lock: $!";
-flock L, LOCK_EX;
-if (readlink ("$destdir.archive_hash") eq $archive_hash && -d $destdir) {
-  # This exact git archive (source + arvados sdk) is already installed
-  # here, so there's no need to reinstall it.
-
-  # We must consume our DATA section, though: otherwise the process
-  # feeding it to us will get SIGPIPE.
-  my $buf;
-  while (read(DATA, $buf, 65536)) { }
-
-  exit(0);
-}
-
-unlink "$destdir.archive_hash";
-mkdir $destdir;
-
-do {
-  # Ignore SIGPIPE: we check retval of close() instead. See perlipc(1).
-  local $SIG{PIPE} = "IGNORE";
-  warn "Extracting archive: $archive_hash\n";
-  # --ignore-zeros is necessary sometimes: depending on how much NUL
-  # padding tar -A put on our combined archive (which in turn depends
-  # on the length of the component archives) tar without
-  # --ignore-zeros will exit before consuming stdin and cause close()
-  # to fail on the resulting SIGPIPE.
-  if (!open(TARX, "|-", "tar", "--ignore-zeros", "-xC", $destdir)) {
-    die "Error launching 'tar -xC $destdir': $!";
-  }
-  # If we send too much data to tar in one write (> 4-5 MiB), it stops, and we
-  # get SIGPIPE.  We must feed it data incrementally.
-  my $tar_input;
-  while (read(DATA, $tar_input, 65536)) {
-    print TARX $tar_input;
-  }
-  if(!close(TARX)) {
-    die "'tar -xC $destdir' exited $?: $!";
-  }
-};
-
-mkdir $install_dir;
-
-my $sdk_root = "$destdir/.arvados.sdk/sdk";
-if (-d $sdk_root) {
-  foreach my $sdk_lang (("python",
-                         map { (split /\//, $_, 2)[0]; } keys(%SDK_ENVVARS))) {
-    if (-d "$sdk_root/$sdk_lang") {
-      if (!rename("$sdk_root/$sdk_lang", "$install_dir/$sdk_lang")) {
-        die "Failed to install $sdk_lang SDK: $!";
-      }
-    }
-  }
-}
-
-my $python_dir = "$install_dir/python";
-if ((-d $python_dir) and can_run("python2.7")) {
-  open(my $egg_info_pipe, "-|",
-       "python2.7 \Q$python_dir/setup.py\E egg_info 2>&1 >/dev/null");
-  my @egg_info_errors = <$egg_info_pipe>;
-  close($egg_info_pipe);
-
-  if ($?) {
-    if (@egg_info_errors and (($egg_info_errors[-1] =~ /\bgit\b/) or ($egg_info_errors[-1] =~ /\[Errno 2\]/))) {
-      # egg_info apparently failed because it couldn't ask git for a build tag.
-      # Specify no build tag.
-      open(my $pysdk_cfg, ">>", "$python_dir/setup.cfg");
-      print $pysdk_cfg "\n[egg_info]\ntag_build =\n";
-      close($pysdk_cfg);
-    } else {
-      my $egg_info_exit = $? >> 8;
-      foreach my $errline (@egg_info_errors) {
-        warn $errline;
-      }
-      warn "python setup.py egg_info failed: exit $egg_info_exit";
-      exit ($egg_info_exit || 1);
-    }
-  }
-}
-
-# Hide messages from the install script (unless it fails: shell_or_die
-# will show $destdir.log in that case).
-open(STDOUT, ">>", "$destdir.log") or die ($!);
-open(STDERR, ">&", STDOUT) or die ($!);
-
-if (-e "$destdir/crunch_scripts/install") {
-    shell_or_die (undef, "$destdir/crunch_scripts/install", $install_dir);
-} elsif (!-e "./install.sh" && -e "./tests/autotests.sh") {
-    # Old version
-    shell_or_die (undef, "./tests/autotests.sh", $install_dir);
-} elsif (-e "./install.sh") {
-    shell_or_die (undef, "./install.sh", $install_dir);
-}
-
-if ($archive_hash) {
-    unlink "$destdir.archive_hash.new";
-    symlink ($archive_hash, "$destdir.archive_hash.new") or die "$destdir.archive_hash.new: $!";
-    rename ("$destdir.archive_hash.new", "$destdir.archive_hash") or die "$destdir.archive_hash: $!";
-}
-
-close L;
-
-sub can_run {
-  my $command_name = shift;
-  open(my $which, "-|", "which", $command_name) or die ($!);
-  while (<$which>) { }
-  close($which);
-  return ($? == 0);
-}
-
-sub shell_or_die
-{
-  my $exitcode = shift;
-
-  if ($ENV{"DEBUG"}) {
-    print STDERR "@_\n";
-  }
-  if (system (@_) != 0) {
-    my $err = $!;
-    my $code = $?;
-    my $exitstatus = sprintf("exit %d signal %d", $code >> 8, $code & 0x7f);
-    open STDERR, ">&STDERR_ORIG";
-    system ("cat $destdir.log >&2");
-    warn "@_ failed ($err): $exitstatus";
-    if (defined($exitcode)) {
-      exit $exitcode;
-    }
-    else {
-      exit (($code >> 8) || 1);
-    }
-  }
-}
-
-__DATA__

commit bc1947e4aef52fe5f3aebc10dc2ea74cad86672d
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Wed Aug 7 16:39:55 2019 -0400

    15133: API tests passing
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/services/api/app/controllers/arvados/v1/jobs_controller.rb b/services/api/app/controllers/arvados/v1/jobs_controller.rb
index f6308c528..58a3fd168 100644
--- a/services/api/app/controllers/arvados/v1/jobs_controller.rb
+++ b/services/api/app/controllers/arvados/v1/jobs_controller.rb
@@ -28,13 +28,12 @@ class Arvados::V1::JobsController < ApplicationController
   end
 
   def queue
-    return send_error("Unsupported legacy jobs API",
-                      status: 400)
+    @objects = []
+    index
   end
 
   def queue_size
-    return send_error("Unsupported legacy jobs API",
-                      status: 400)
+    render :json => {:queue_size => 0}
   end
 
   def self._create_requires_parameters
diff --git a/services/api/test/functional/arvados/v1/job_reuse_controller_test.rb b/services/api/test/functional/arvados/v1/job_reuse_controller_test.rb
index d10ab6a71..02c5c6892 100644
--- a/services/api/test/functional/arvados/v1/job_reuse_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/job_reuse_controller_test.rb
@@ -8,370 +8,11 @@ require 'helpers/git_test_helper'
 class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
   fixtures :repositories, :users, :jobs, :links, :collections
 
-  # See git_setup.rb for the commit log for test.git.tar
-  include GitTestHelper
-
   setup do
     @controller = Arvados::V1::JobsController.new
     authorize_with :active
   end
 
-  test "reuse job with no_reuse=false" do
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          an_integer: '1',
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "reuse job with find_or_create=true" do
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      },
-      find_or_create: true
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "no reuse job with null log" do
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '3'
-        }
-      },
-      find_or_create: true
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqq3', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "reuse job with symbolic script_version" do
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "tag1",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      },
-      find_or_create: true
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "do not reuse job because no_reuse=true" do
-    post :create, params: {
-      job: {
-        no_reuse: true,
-        script: "hash",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  [false, "false"].each do |whichfalse|
-    test "do not reuse job because find_or_create=#{whichfalse.inspect}" do
-      post :create, params: {
-        job: {
-          script: "hash",
-          script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-          repository: "active/foo",
-          script_parameters: {
-            input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-            an_integer: '1'
-          }
-        },
-        find_or_create: whichfalse
-      }
-      assert_response :success
-      assert_not_nil assigns(:object)
-      new_job = JSON.parse(@response.body)
-      assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-      assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-    end
-  end
-
-  test "do not reuse job because output is not readable by user" do
-    authorize_with :job_reader
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      },
-      find_or_create: true
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "test_cannot_reuse_job_no_output" do
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '2'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykppp', new_job['uuid']
-  end
-
-  test "test_reuse_job_range" do
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash",
-        minimum_script_version: "tag1",
-        script_version: "master",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "cannot_reuse_job_no_minimum_given_so_must_use_specified_commit" do
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash",
-        script_version: "master",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '077ba2ad3ea24a929091a9e6ce545c93199b8e57', new_job['script_version']
-  end
-
-  test "test_cannot_reuse_job_different_input" do
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '2'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "test_cannot_reuse_job_different_version" do
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash",
-        script_version: "master",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '2'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '077ba2ad3ea24a929091a9e6ce545c93199b8e57', new_job['script_version']
-  end
-
-  test "test_can_reuse_job_submitted_nondeterministic" do
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        },
-        nondeterministic: true
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "test_cannot_reuse_job_past_nondeterministic" do
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash2",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykyyy', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "test_cannot_reuse_job_no_permission" do
-    authorize_with :spectator
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash",
-        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "test_cannot_reuse_job_excluded" do
-    post :create, params: {
-      job: {
-        no_reuse: false,
-        script: "hash",
-        minimum_script_version: "31ce37fe365b3dc204300a3e4c396ad333ed0556",
-        script_version: "master",
-        repository: "active/foo",
-        exclude_script_versions: ["tag1"],
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_not_equal('4fe459abe02d9b365932b8f5dc419439ab4e2577',
-                     new_job['script_version'])
-  end
-
-  test "cannot reuse job with find_or_create but excluded version" do
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "master",
-        repository: "active/foo",
-        script_parameters: {
-          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-          an_integer: '1'
-        }
-      },
-      find_or_create: true,
-      minimum_script_version: "31ce37fe365b3dc204300a3e4c396ad333ed0556",
-      exclude_script_versions: ["tag1"],
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_not_equal('4fe459abe02d9b365932b8f5dc419439ab4e2577',
-                     new_job['script_version'])
-  end
-
-  test "cannot reuse job when hash-like branch includes newer commit" do
-    check_new_job_created_from({job: {script_version: "738783"}},
-                               :previous_job_run_superseded_by_hash_branch)
-  end
-
   BASE_FILTERS = {
     'repository' => ['=', 'active/foo'],
     'script' => ['=', 'hash'],
@@ -384,217 +25,6 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
     hash.each_pair.map { |name, filter| [name] + filter }
   end
 
-  test "can reuse a Job based on filters" do
-    filters_hash = BASE_FILTERS.
-      merge('script_version' => ['in git', 'tag1'])
-    post(:create, params: {
-           job: {
-             script: "hash",
-             script_version: "master",
-             repository: "active/foo",
-             script_parameters: {
-               input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-               an_integer: '1'
-             }
-           },
-           filters: filters_from_hash(filters_hash),
-           find_or_create: true,
-         })
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "can not reuse a Job based on filters" do
-    filters = filters_from_hash(BASE_FILTERS
-                                  .reject { |k| k == 'script_version' })
-    filters += [["script_version", "in git",
-                 "31ce37fe365b3dc204300a3e4c396ad333ed0556"],
-                ["script_version", "not in git", ["tag1"]]]
-    post(:create, params: {
-           job: {
-             script: "hash",
-             script_version: "master",
-             repository: "active/foo",
-             script_parameters: {
-               input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-               an_integer: '1'
-             }
-           },
-           filters: filters,
-           find_or_create: true,
-         })
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '077ba2ad3ea24a929091a9e6ce545c93199b8e57', new_job['script_version']
-  end
-
-  test "can not reuse a Job based on arbitrary filters" do
-    filters_hash = BASE_FILTERS.
-      merge("created_at" => ["<", "2010-01-01T00:00:00Z"])
-    post(:create, params: {
-           job: {
-             script: "hash",
-             script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-             repository: "active/foo",
-             script_parameters: {
-               input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-               an_integer: '1'
-             }
-           },
-           filters: filters_from_hash(filters_hash),
-           find_or_create: true,
-         })
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_equal 'zzzzz-8i9sb-cjs4pklxxjykqqq', new_job['uuid']
-    assert_equal '4fe459abe02d9b365932b8f5dc419439ab4e2577', new_job['script_version']
-  end
-
-  test "can reuse a Job with a Docker image" do
-    post(:create, params: {
-           job: {
-             script: "hash",
-             script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-             repository: "active/foo",
-             script_parameters: {
-               input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-               an_integer: '1'
-             },
-             runtime_constraints: {
-               docker_image: 'arvados/apitestfixture',
-             }
-           },
-           find_or_create: true,
-         })
-    assert_response :success
-    new_job = assigns(:object)
-    assert_not_nil new_job
-    target_job = jobs(:previous_docker_job_run)
-    [:uuid, :script_version, :docker_image_locator].each do |attr|
-      assert_equal(target_job.send(attr), new_job.send(attr))
-    end
-  end
-
-  test "can reuse a Job with a Docker image hash filter" do
-    filters_hash = BASE_FILTERS.
-      merge("script_version" =>
-              ["=", "4fe459abe02d9b365932b8f5dc419439ab4e2577"],
-            "docker_image_locator" =>
-              ["in docker", links(:docker_image_collection_hash).name])
-    post(:create, params: {
-           job: {
-             script: "hash",
-             script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-             repository: "active/foo",
-             script_parameters: {
-               input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-               an_integer: '1'
-             },
-           },
-           filters: filters_from_hash(filters_hash),
-           find_or_create: true,
-         })
-    assert_response :success
-    new_job = assigns(:object)
-    assert_not_nil new_job
-    target_job = jobs(:previous_docker_job_run)
-    [:uuid, :script_version, :docker_image_locator].each do |attr|
-      assert_equal(target_job.send(attr), new_job.send(attr))
-    end
-  end
-
-  test "reuse Job with Docker image repo+tag" do
-    filters_hash = BASE_FILTERS.
-      merge("script_version" =>
-              ["=", "4fe459abe02d9b365932b8f5dc419439ab4e2577"],
-            "docker_image_locator" =>
-              ["in docker", links(:docker_image_collection_tag2).name])
-    post(:create, params: {
-           job: {
-             script: "hash",
-             script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-             repository: "active/foo",
-             script_parameters: {
-               input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-               an_integer: '1'
-             },
-           },
-           filters: filters_from_hash(filters_hash),
-           find_or_create: true,
-         })
-    assert_response :success
-    new_job = assigns(:object)
-    assert_not_nil new_job
-    target_job = jobs(:previous_docker_job_run)
-    [:uuid, :script_version, :docker_image_locator].each do |attr|
-      assert_equal(target_job.send(attr), new_job.send(attr))
-    end
-  end
-
-  test "new job with unknown Docker image filter" do
-    filters_hash = BASE_FILTERS.
-      merge("docker_image_locator" => ["in docker", "_nonesuchname_"])
-    post(:create, params: {
-           job: {
-             script: "hash",
-             script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-             repository: "active/foo",
-             script_parameters: {
-               input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-               an_integer: '1'
-             },
-           },
-           filters: filters_from_hash(filters_hash),
-           find_or_create: true,
-         })
-    assert_response :success
-    new_job = assigns(:object)
-    assert_not_nil new_job
-    assert_not_equal(jobs(:previous_docker_job_run).uuid, new_job.uuid)
-  end
-
-  test "don't reuse job using older Docker image of same name" do
-    jobspec = {runtime_constraints: {
-        docker_image: "arvados/apitestfixture",
-      }}
-    check_new_job_created_from({job: jobspec},
-                               :previous_ancient_docker_image_job_run)
-  end
-
-  test "reuse job with Docker image that has hash name" do
-    jobspec = {runtime_constraints: {
-        docker_image: "a" * 64,
-      }}
-    check_job_reused_from(jobspec, :previous_docker_job_run)
-  end
-
-  ["repository", "script"].each do |skip_key|
-    test "missing #{skip_key} filter raises an error" do
-      filters = filters_from_hash(BASE_FILTERS.reject { |k| k == skip_key })
-      post(:create, params: {
-             job: {
-               script: "hash",
-               script_version: "master",
-               repository: "active/foo",
-               script_parameters: {
-                 input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-                 an_integer: '1'
-               }
-             },
-             filters: filters,
-             find_or_create: true,
-           })
-      assert_includes(405..599, @response.code.to_i,
-                      "bad status code with missing #{skip_key} filter")
-    end
-  end
-
   test "find Job with script version range" do
     get :index, params: {
       filters: [["repository", "=", "active/foo"],
@@ -673,136 +103,4 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
                     jobs(:previous_docker_job_run).uuid)
   end
 
-  JOB_SUBMIT_KEYS = [:script, :script_parameters, :script_version, :repository]
-  DEFAULT_START_JOB = :previous_job_run
-
-  def create_job_params(params, start_from=DEFAULT_START_JOB)
-    if not params.has_key?(:find_or_create)
-      params[:find_or_create] = true
-    end
-    job_attrs = params.delete(:job) || {}
-    start_job = jobs(start_from)
-    params[:job] = Hash[JOB_SUBMIT_KEYS.map do |key|
-                          [key, start_job.send(key)]
-                        end]
-    params[:job][:runtime_constraints] =
-      job_attrs.delete(:runtime_constraints) || {}
-    { arvados_sdk_version: :arvados_sdk_version,
-      docker_image_locator: :docker_image }.each do |method, constraint_key|
-      if constraint_value = start_job.send(method)
-        params[:job][:runtime_constraints][constraint_key] ||= constraint_value
-      end
-    end
-    params[:job].merge!(job_attrs)
-    params
-  end
-
-  def create_job_from(params, start_from)
-    post(:create, params: create_job_params(params, start_from))
-    assert_response :success
-    new_job = assigns(:object)
-    assert_not_nil new_job
-    new_job
-  end
-
-  def check_new_job_created_from(params, start_from=DEFAULT_START_JOB)
-    start_time = Time.now
-    new_job = create_job_from(params, start_from)
-    assert_operator(start_time, :<=, new_job.created_at)
-    new_job
-  end
-
-  def check_job_reused_from(params, start_from)
-    new_job = create_job_from(params, start_from)
-    assert_equal(jobs(start_from).uuid, new_job.uuid)
-  end
-
-  def check_errors_from(params, start_from=DEFAULT_START_JOB)
-    post(:create, params: create_job_params(params, start_from))
-    assert_includes(405..499, @response.code.to_i)
-    errors = json_response.fetch("errors", [])
-    assert(errors.any?, "no errors assigned from #{params}")
-    refute(errors.any? { |msg| msg =~ /^#<[A-Za-z]+: / },
-           "errors include raw exception: #{errors.inspect}")
-    errors
-  end
-
-  # 1de84a8 is on the b1 branch, after master's tip.
-  test "new job created from unsatisfiable minimum version filter" do
-    filters_hash = BASE_FILTERS.merge("script_version" => ["in git", "1de84a8"])
-    check_new_job_created_from(filters: filters_from_hash(filters_hash))
-  end
-
-  test "new job created from unsatisfiable minimum version parameter" do
-    check_new_job_created_from(minimum_script_version: "1de84a8")
-  end
-
-  test "new job created from unsatisfiable minimum version attribute" do
-    check_new_job_created_from(job: {minimum_script_version: "1de84a8"})
-  end
-
-  test "graceful error from nonexistent minimum version filter" do
-    filters_hash = BASE_FILTERS.merge("script_version" =>
-                                      ["in git", "__nosuchbranch__"])
-    errors = check_errors_from(filters: filters_from_hash(filters_hash))
-    assert(errors.any? { |msg| msg.include? "__nosuchbranch__" },
-           "bad refspec not mentioned in error message")
-  end
-
-  test "graceful error from nonexistent minimum version parameter" do
-    errors = check_errors_from(minimum_script_version: "__nosuchbranch__")
-    assert(errors.any? { |msg| msg.include? "__nosuchbranch__" },
-           "bad refspec not mentioned in error message")
-  end
-
-  test "graceful error from nonexistent minimum version attribute" do
-    errors = check_errors_from(job: {minimum_script_version: "__nosuchbranch__"})
-    assert(errors.any? { |msg| msg.include? "__nosuchbranch__" },
-           "bad refspec not mentioned in error message")
-  end
-
-  test "don't reuse job with older Arvados SDK version specified by branch" do
-    jobspec = {runtime_constraints: {
-        arvados_sdk_version: "master",
-      }}
-    check_new_job_created_from({job: jobspec},
-                               :previous_job_run_with_arvados_sdk_version)
-  end
-
-  test "don't reuse job with older Arvados SDK version specified by commit" do
-    jobspec = {runtime_constraints: {
-        arvados_sdk_version: "ca68b24e51992e790f29df5cc4bc54ce1da4a1c2",
-      }}
-    check_new_job_created_from({job: jobspec},
-                               :previous_job_run_with_arvados_sdk_version)
-  end
-
-  test "don't reuse job with newer Arvados SDK version specified by commit" do
-    jobspec = {runtime_constraints: {
-        arvados_sdk_version: "436637c87a1d2bdbf4b624008304064b6cf0e30c",
-      }}
-    check_new_job_created_from({job: jobspec},
-                               :previous_job_run_with_arvados_sdk_version)
-  end
-
-  test "reuse job from arvados_sdk_version git filters" do
-    prev_job = jobs(:previous_job_run_with_arvados_sdk_version)
-    filters_hash = BASE_FILTERS.
-      merge("arvados_sdk_version" => ["in git", "commit2"],
-            "docker_image_locator" => ["=", prev_job.docker_image_locator])
-    filters_hash.delete("script_version")
-    params = create_job_params(filters: filters_from_hash(filters_hash))
-    post(:create, params: params)
-    assert_response :success
-    assert_equal(prev_job.uuid, assigns(:object).uuid)
-  end
-
-  test "create new job because of arvados_sdk_version 'not in git' filters" do
-    filters_hash = BASE_FILTERS.reject { |k| k == "script_version" }
-    filters = filters_from_hash(filters_hash)
-    # Allow anything from the root commit, but before commit 2.
-    filters += [["arvados_sdk_version", "in git", "436637c8"],
-                ["arvados_sdk_version", "not in git", "00634b2b"]]
-    check_new_job_created_from(filters: filters)
-  end
 end
diff --git a/services/api/test/functional/arvados/v1/jobs_controller_test.rb b/services/api/test/functional/arvados/v1/jobs_controller_test.rb
index 3803a0dc4..9298f23d5 100644
--- a/services/api/test/functional/arvados/v1/jobs_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/jobs_controller_test.rb
@@ -7,172 +7,6 @@ require 'helpers/git_test_helper'
 
 class Arvados::V1::JobsControllerTest < ActionController::TestCase
 
-  include GitTestHelper
-
-  test "submit a job" do
-    authorize_with :active
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "master",
-        repository: "active/foo",
-        script_parameters: {}
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = JSON.parse(@response.body)
-    assert_not_nil new_job['uuid']
-    assert_not_nil new_job['script_version'].match(/^[0-9a-f]{40}$/)
-    assert_equal 0, new_job['priority']
-  end
-
-  test "normalize output and log uuids when creating job" do
-    authorize_with :active
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "master",
-        script_parameters: {},
-        repository: "active/foo",
-        started_at: Time.now,
-        finished_at: Time.now,
-        running: false,
-        success: true,
-        output: 'd41d8cd98f00b204e9800998ecf8427e+0+K at xyzzy',
-        log: 'd41d8cd98f00b204e9800998ecf8427e+0+K at xyzzy'
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    new_job = assigns(:object)
-    assert_equal 'd41d8cd98f00b204e9800998ecf8427e+0', new_job['log']
-    assert_equal 'd41d8cd98f00b204e9800998ecf8427e+0', new_job['output']
-    version = new_job['script_version']
-
-    # Make sure version doesn't get mangled by normalize
-    assert_not_nil version.match(/^[0-9a-f]{40}$/)
-    assert_equal 'master', json_response['supplied_script_version']
-  end
-
-  test "normalize output and log uuids when updating job" do
-    authorize_with :active
-
-    foobar_job = jobs(:foobar)
-
-    new_output = 'd41d8cd98f00b204e9800998ecf8427e+0+K at xyzzy'
-    new_log = 'd41d8cd98f00b204e9800998ecf8427e+0+K at xyzzy'
-    put :update, params: {
-      id: foobar_job['uuid'],
-      job: {
-        output: new_output,
-        log: new_log
-      }
-    }
-
-    updated_job = json_response
-    assert_not_equal foobar_job['log'], updated_job['log']
-    assert_not_equal new_log, updated_job['log']  # normalized during update
-    assert_equal new_log[0,new_log.rindex('+')], updated_job['log']
-    assert_not_equal foobar_job['output'], updated_job['output']
-    assert_not_equal new_output, updated_job['output']  # normalized during update
-    assert_equal new_output[0,new_output.rindex('+')], updated_job['output']
-  end
-
-  test "cancel a running job" do
-    # We need to verify that "cancel" creates a trigger file, so first
-    # let's make sure there is no stale trigger file.
-    begin
-      File.unlink(Rails.configuration.Containers.JobsAPI.CrunchRefreshTrigger)
-    rescue Errno::ENOENT
-    end
-
-    authorize_with :active
-    put :update, params: {
-      id: jobs(:running).uuid,
-      job: {
-        cancelled_at: 4.day.ago
-      }
-    }
-    assert_response :success
-    assert_not_nil assigns(:object)
-    job = JSON.parse(@response.body)
-    assert_not_nil job['uuid']
-    assert_not_nil job['cancelled_at']
-    assert_not_nil job['cancelled_by_user_uuid']
-    assert_not_nil job['cancelled_by_client_uuid']
-    assert_equal(true, Time.parse(job['cancelled_at']) > 1.minute.ago,
-                 'server should correct bogus cancelled_at ' +
-                 job['cancelled_at'])
-    assert_equal(true,
-                 File.exist?(Rails.configuration.Containers.JobsAPI.CrunchRefreshTrigger),
-                 'trigger file should be created when job is cancelled')
-  end
-
-  [
-   [:put, :update, {job:{cancelled_at: Time.now}}, :success],
-   [:put, :update, {job:{cancelled_at: nil}}, :unprocessable_entity],
-   [:put, :update, {job:{state: 'Cancelled'}}, :success],
-   [:put, :update, {job:{state: 'Queued'}}, :unprocessable_entity],
-   [:put, :update, {job:{state: 'Running'}}, :unprocessable_entity],
-   [:put, :update, {job:{state: 'Failed'}}, :unprocessable_entity],
-   [:put, :update, {job:{state: 'Complete'}}, :unprocessable_entity],
-   [:post, :cancel, {}, :success],
-  ].each do |http_method, action, params, expected_response|
-    test "cancelled job stays cancelled after #{[http_method, action, params].inspect}" do
-      # We need to verify that "cancel" creates a trigger file, so first
-      # let's make sure there is no stale trigger file.
-      begin
-        File.unlink(Rails.configuration.Containers.JobsAPI.CrunchRefreshTrigger)
-      rescue Errno::ENOENT
-      end
-
-      authorize_with :active
-      self.send http_method, action, params: { id: jobs(:cancelled).uuid }.merge(params)
-      assert_response expected_response
-      if expected_response == :success
-        job = json_response
-        assert_not_nil job['cancelled_at'], 'job cancelled again using #{attribute}=#{value} did not have cancelled_at value'
-        assert_equal job['state'], 'Cancelled', 'cancelled again job state changed when updated using using #{attribute}=#{value}'
-      end
-      # Verify database record still says Cancelled
-      assert_equal 'Cancelled', Job.find(jobs(:cancelled).id).state, 'job was un-cancelled'
-    end
-  end
-
-  test "cancelled job updated to any other state change results in error" do
-    # We need to verify that "cancel" creates a trigger file, so first
-    # let's make sure there is no stale trigger file.
-    begin
-      File.unlink(Rails.configuration.Containers.JobsAPI.CrunchRefreshTrigger)
-    rescue Errno::ENOENT
-    end
-
-    authorize_with :active
-    put :update, params: {
-      id: jobs(:running_cancelled).uuid,
-      job: {
-        cancelled_at: nil
-      }
-    }
-    assert_response 422
-  end
-
-  ['abc.py', 'hash.py'].each do |script|
-    test "update job script attribute to #{script} without failing script_version check" do
-      authorize_with :admin
-      put :update, params: {
-        id: jobs(:uses_nonexistent_script_version).uuid,
-        job: {
-          script: script
-        }
-      }
-      assert_response :success
-      resp = assigns(:object)
-      assert_equal jobs(:uses_nonexistent_script_version).script_version, resp['script_version']
-    end
-  end
-
   test "search jobs by uuid with >= query" do
     authorize_with :active
     get :index, params: {
@@ -331,52 +165,12 @@ class Arvados::V1::JobsControllerTest < ActionController::TestCase
     assert_response 422
   end
 
-  test "finish a job" do
-    authorize_with :active
-    put :update, params: {
-      id: jobs(:nearly_finished_job).uuid,
-      job: {
-        output: '551392cc37a317abf865b95f66f4ef94+101',
-        log: '9215de2a951a721f5f156bc08cf63ad7+93',
-        tasks_summary: {done: 1, running: 0, todo: 0, failed: 0},
-        success: true,
-        running: false,
-        finished_at: Time.now.to_s
-      }
-    }
-    assert_response :success
-  end
-
   [:spectator, :admin].each_with_index do |which_token, i|
     test "get job queue as #{which_token} user" do
       authorize_with which_token
       get :queue
       assert_response :success
-      assert_equal i, assigns(:objects).count
-    end
-  end
-
-  test "get job queue as with a = filter" do
-    authorize_with :admin
-    get :queue, params: { filters: [['script','=','foo']] }
-    assert_response :success
-    assert_equal ['foo'], assigns(:objects).collect(&:script).uniq
-    assert_equal 0, assigns(:objects)[0].queue_position
-  end
-
-  test "get job queue as with a != filter" do
-    authorize_with :admin
-    get :queue, params: { filters: [['script','!=','foo']] }
-    assert_response :success
-    assert_equal 0, assigns(:objects).count
-  end
-
-  [:spectator, :admin].each do |which_token|
-    test "get queue_size as #{which_token} user" do
-      authorize_with which_token
-      get :queue_size
-      assert_response :success
-      assert_equal 1, JSON.parse(@response.body)["queue_size"]
+      assert_equal 0, assigns(:objects).count
     end
   end
 
@@ -387,67 +181,6 @@ class Arvados::V1::JobsControllerTest < ActionController::TestCase
     assert_equal([nodes(:busy).uuid], json_response["node_uuids"])
   end
 
-  test "job lock success" do
-    authorize_with :active
-    post :lock, params: {id: jobs(:queued).uuid}
-    assert_response :success
-    job = Job.where(uuid: jobs(:queued).uuid).first
-    assert_equal "Running", job.state
-  end
-
-  test "job lock conflict" do
-    authorize_with :active
-    post :lock, params: {id: jobs(:running).uuid}
-    assert_response 422 # invalid state transition
-  end
-
-  test 'reject invalid commit in remote repository' do
-    authorize_with :active
-    url = "http://localhost:1/fake/fake.git"
-    fetch_remote_from_local_repo url, :foo
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "abc123",
-        repository: url,
-        script_parameters: {}
-      }
-    }
-    assert_response 422
-  end
-
-  test 'tag remote commit in internal repository' do
-    authorize_with :active
-    url = "http://localhost:1/fake/fake.git"
-    fetch_remote_from_local_repo url, :foo
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "master",
-        repository: url,
-        script_parameters: {}
-      }
-    }
-    assert_response :success
-    assert_equal('077ba2ad3ea24a929091a9e6ce545c93199b8e57',
-                 internal_tag(json_response['uuid']))
-  end
-
-  test 'tag local commit in internal repository' do
-    authorize_with :active
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "master",
-        repository: "active/foo",
-        script_parameters: {}
-      }
-    }
-    assert_response :success
-    assert_equal('077ba2ad3ea24a929091a9e6ce545c93199b8e57',
-                 internal_tag(json_response['uuid']))
-  end
-
   test 'get job with components' do
     authorize_with :active
     get :show, params: {id: jobs(:running_job_with_components).uuid}
@@ -455,42 +188,4 @@ class Arvados::V1::JobsControllerTest < ActionController::TestCase
     assert_not_nil json_response["components"]
     assert_equal ["component1", "component2"], json_response["components"].keys
   end
-
-  [
-    [:active, :success],
-    [:system_user, :success],
-    [:admin, 403],
-  ].each do |user, expected|
-    test "add components to job locked by active user as #{user} user and expect #{expected}" do
-      authorize_with user
-      put :update, params: {
-        id: jobs(:running).uuid,
-        job: {
-          components: {"component1" => "value1", "component2" => "value2"}
-        }
-      }
-      assert_response expected
-      if expected == :success
-        assert_not_nil json_response["components"]
-        keys = json_response["components"].keys
-        assert_equal ["component1", "component2"], keys
-        assert_equal "value1", json_response["components"][keys[0]]
-      end
-    end
-  end
-
-  test 'jobs.create disabled in config' do
-    Rails.configuration.API.DisabledAPIs = {"jobs.create"=>{},
-                                               "pipeline_instances.create"=>{}}
-    authorize_with :active
-    post :create, params: {
-      job: {
-        script: "hash",
-        script_version: "master",
-        repository: "active/foo",
-        script_parameters: {}
-      }
-    }
-    assert_response 404
-  end
 end
diff --git a/services/api/test/functional/arvados/v1/pipeline_instances_controller_test.rb b/services/api/test/functional/arvados/v1/pipeline_instances_controller_test.rb
index a76151150..e455354c1 100644
--- a/services/api/test/functional/arvados/v1/pipeline_instances_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/pipeline_instances_controller_test.rb
@@ -5,48 +5,4 @@
 require 'test_helper'
 
 class Arvados::V1::PipelineInstancesControllerTest < ActionController::TestCase
-
-  test 'create pipeline with components copied from template' do
-    authorize_with :active
-    post :create, params: {
-      pipeline_instance: {
-        pipeline_template_uuid: pipeline_templates(:two_part).uuid
-      }
-    }
-    assert_response :success
-    assert_equal(pipeline_templates(:two_part).components.to_json,
-                 assigns(:object).components.to_json)
-  end
-
-  test 'create pipeline with no template' do
-    authorize_with :active
-    post :create, params: {
-      pipeline_instance: {
-        components: {}
-      }
-    }
-    assert_response :success
-    assert_equal({}, assigns(:object).components)
-  end
-
-  [
-    true,
-    false
-  ].each do |cascade|
-    test "cancel a pipeline instance with cascade=#{cascade}" do
-      authorize_with :active
-      pi_uuid = pipeline_instances(:job_child_pipeline_with_components_at_level_2).uuid
-
-      post :cancel, params: {id: pi_uuid, cascade: cascade}
-      assert_response :success
-
-      pi = PipelineInstance.where(uuid: pi_uuid).first
-      assert_equal "Paused", pi.state
-
-      children = Job.where(uuid: ['zzzzz-8i9sb-job1atlevel3noc', 'zzzzz-8i9sb-job2atlevel3noc'])
-      children.each do |child|
-        assert_equal ("Cancelled" == child.state), cascade
-      end
-    end
-  end
 end
diff --git a/services/api/test/integration/crunch_dispatch_test.rb b/services/api/test/integration/crunch_dispatch_test.rb
deleted file mode 100644
index 6ac127087..000000000
--- a/services/api/test/integration/crunch_dispatch_test.rb
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-require 'test_helper'
-require 'helpers/git_test_helper'
-
-class CrunchDispatchIntegrationTest < ActionDispatch::IntegrationTest
-  include GitTestHelper
-
-  fixtures :all
-
-  @@crunch_dispatch_pid = nil
-
-  def launch_crunch_dispatch
-    @@crunch_dispatch_pid = Process.fork {
-      ENV['PATH'] = ENV['HOME'] + '/arvados/services/crunch:' + ENV['PATH']
-      exec(ENV['HOME'] + '/arvados/services/api/script/crunch-dispatch.rb')
-    }
-  end
-
-  teardown do
-    if @@crunch_dispatch_pid
-      Process.kill "TERM", @@crunch_dispatch_pid
-      Process.wait
-      @@crunch_dispatch_pid = nil
-    end
-  end
-
-  test "job runs" do
-    post "/arvados/v1/jobs",
-      params: {
-        format: "json",
-        job: {
-          script: "log",
-          repository: "active/crunchdispatchtest",
-          script_version: "f35f99b7d32bac257f5989df02b9f12ee1a9b0d6",
-          script_parameters: {
-            input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-            an_integer: '1'
-          }
-        }
-      },
-      headers: auth(:admin)
-    assert_response :success
-  end
-end
diff --git a/services/api/test/integration/jobs_api_test.rb b/services/api/test/integration/jobs_api_test.rb
index f5fb920b4..76d4fff59 100644
--- a/services/api/test/integration/jobs_api_test.rb
+++ b/services/api/test/integration/jobs_api_test.rb
@@ -5,87 +5,4 @@
 require 'test_helper'
 
 class JobsApiTest < ActionDispatch::IntegrationTest
-  fixtures :all
-
-  test "cancel job" do
-    post "/arvados/v1/jobs/#{jobs(:running).uuid}/cancel",
-      params: {:format => :json},
-      headers: {'HTTP_AUTHORIZATION' => "OAuth2 #{api_client_authorizations(:active).api_token}"}
-    assert_response :success
-    assert_equal "arvados#job", json_response['kind']
-    assert_not_nil json_response['cancelled_at']
-  end
-
-  test "cancel someone else's visible job" do
-    post "/arvados/v1/jobs/#{jobs(:runningbarbaz).uuid}/cancel",
-      params: {:format => :json},
-      headers: {'HTTP_AUTHORIZATION' => "OAuth2 #{api_client_authorizations(:spectator).api_token}"}
-    assert_response 403
-  end
-
-  test "cancel someone else's invisible job" do
-    post "/arvados/v1/jobs/#{jobs(:running).uuid}/cancel",
-      params: {:format => :json},
-      headers: {'HTTP_AUTHORIZATION' => "OAuth2 #{api_client_authorizations(:spectator).api_token}"}
-    assert_response 404
-  end
-
-  test "task qsequence values automatically increase monotonically" do
-    post_args = ["/arvados/v1/job_tasks",
-                 params: {job_task: {
-                     job_uuid: jobs(:running).uuid,
-                     sequence: 1,
-                   }},
-                 headers: auth(:active)]
-    last_qsequence = -1
-    (1..3).each do |task_num|
-      @response = nil
-      post(*post_args)
-      assert_response :success
-      qsequence = json_response["qsequence"]
-      assert_not_nil(qsequence, "task not assigned qsequence")
-      assert_operator(qsequence, :>, last_qsequence,
-                      "qsequence did not increase between tasks")
-      last_qsequence = qsequence
-    end
-  end
-
-  test 'get_delete components_get again for job with components' do
-    authorize_with :active
-    get "/arvados/v1/jobs/#{jobs(:running_job_with_components).uuid}",
-      headers: auth(:active)
-    assert_response 200
-    assert_not_nil json_response["components"]
-    assert_equal ["component1", "component2"], json_response["components"].keys
-
-    # delete second component
-    put "/arvados/v1/jobs/#{jobs(:running_job_with_components).uuid}", params: {
-      job: {
-        components: {"component1" => "zzzzz-8i9sb-jobuuid00000001"}
-      },
-      limit: 1000
-    }, headers: auth(:active)
-    assert_response 200
-
-    get "/arvados/v1/jobs/#{jobs(:running_job_with_components).uuid}",
-      headers: auth(:active)
-    assert_response 200
-    assert_not_nil json_response["components"]
-    assert_equal ["component1"], json_response["components"].keys
-
-    # delete all components
-    put "/arvados/v1/jobs/#{jobs(:running_job_with_components).uuid}", params: {
-      job: {
-        components: nil
-      },
-      limit: 1000
-    }, headers: auth(:active)
-    assert_response 200
-
-    get "/arvados/v1/jobs/#{jobs(:running_job_with_components).uuid}",
-      headers: auth(:active)
-    assert_response 200
-    assert_not_nil json_response["components"]
-    assert_equal [], json_response["components"].keys
-  end
 end
diff --git a/services/api/test/integration/pipeline_test.rb b/services/api/test/integration/pipeline_test.rb
index d4f7eba30..4d8f88248 100644
--- a/services/api/test/integration/pipeline_test.rb
+++ b/services/api/test/integration/pipeline_test.rb
@@ -5,40 +5,4 @@
 require 'test_helper'
 
 class PipelineIntegrationTest < ActionDispatch::IntegrationTest
-  # These tests simulate the workflow of arv-run-pipeline-instance
-  # and other pipeline-running code.
-
-  def check_component_match(comp_key, comp_hash)
-    assert_response :success
-    built_json = json_response
-    built_component = built_json["components"][comp_key]
-    comp_hash.each_pair do |key, expected|
-      assert_equal(expected, built_component[key.to_s],
-                   "component's #{key} field changed")
-    end
-  end
-
-  test "creating a pipeline instance preserves required component parameters" do
-    comp_name = "test_component"
-    component = {
-      repository: "test_repo",
-      script: "test_script",
-      script_version: "test_refspec",
-      script_parameters: {},
-    }
-
-    post("/arvados/v1/pipeline_instances",
-      params: {
-        pipeline_instance: {
-          components: {comp_name => component}
-        }.to_json
-      },
-      headers: auth(:active))
-    check_component_match(comp_name, component)
-    pi_uuid = json_response["uuid"]
-
-    @response = nil
-    get("/arvados/v1/pipeline_instances/#{pi_uuid}", params: {}, headers: auth(:active))
-    check_component_match(comp_name, component)
-  end
 end
diff --git a/services/api/test/integration/serialized_encoding_test.rb b/services/api/test/integration/serialized_encoding_test.rb
index 16d43e6f3..f41c033b3 100644
--- a/services/api/test/integration/serialized_encoding_test.rb
+++ b/services/api/test/integration/serialized_encoding_test.rb
@@ -15,31 +15,10 @@ class SerializedEncodingTest < ActionDispatch::IntegrationTest
 
     human: {properties: {eye_color: 'gray'}},
 
-    job: {
-      repository: 'active/foo',
-      runtime_constraints: {docker_image: 'arvados/apitestfixture'},
-      script: 'hash',
-      script_version: 'master',
-      script_parameters: {pattern: 'foobar'},
-      tasks_summary: {todo: 0},
-    },
-
-    job_task: {parameters: {pattern: 'foo'}},
-
     link: {link_class: 'test', name: 'test', properties: {foo: :bar}},
 
     node: {info: {uptime: 1234}},
 
-    pipeline_instance: {
-      components: {"job1" => {parameters: {pattern: "xyzzy"}}},
-      components_summary: {todo: 0},
-      properties: {test: true},
-    },
-
-    pipeline_template: {
-      components: {"job1" => {parameters: {pattern: "xyzzy"}}},
-    },
-
     specimen: {properties: {eye_color: 'meringue'}},
 
     trait: {properties: {eye_color: 'brown'}},
diff --git a/services/api/test/unit/crunch_dispatch_test.rb b/services/api/test/unit/crunch_dispatch_test.rb
deleted file mode 100644
index 3a8f90a66..000000000
--- a/services/api/test/unit/crunch_dispatch_test.rb
+++ /dev/null
@@ -1,218 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-require 'test_helper'
-require 'crunch_dispatch'
-require 'helpers/git_test_helper'
-
-class CrunchDispatchTest < ActiveSupport::TestCase
-  include GitTestHelper
-
-  test 'choose cheaper nodes first' do
-    act_as_system_user do
-      # Replace test fixtures with a set suitable for testing dispatch
-      Node.destroy_all
-
-      # Idle nodes with different prices
-      [['compute1', 3.20, 32],
-       ['compute2', 1.60, 16],
-       ['compute3', 0.80, 8]].each do |hostname, price, cores|
-        Node.create!(hostname: hostname,
-                     info: {
-                       'slurm_state' => 'idle',
-                     },
-                     properties: {
-                       'cloud_node' => {
-                         'price' => price,
-                       },
-                       'total_cpu_cores' => cores,
-                       'total_ram_mb' => cores*1024,
-                       'total_scratch_mb' => cores*10000,
-                     })
-      end
-
-      # Node with no price information
-      Node.create!(hostname: 'compute4',
-                   info: {
-                     'slurm_state' => 'idle',
-                   },
-                   properties: {
-                     'total_cpu_cores' => 8,
-                     'total_ram_mb' => 8192,
-                     'total_scratch_mb' => 80000,
-                   })
-
-      # Cheap but busy node
-      Node.create!(hostname: 'compute5',
-                   info: {
-                     'slurm_state' => 'alloc',
-                   },
-                   properties: {
-                     'cloud_node' => {
-                       'price' => 0.10,
-                     },
-                     'total_cpu_cores' => 32,
-                     'total_ram_mb' => 32768,
-                     'total_scratch_mb' => 320000,
-                   })
-    end
-
-    dispatch = CrunchDispatch.new
-    [[1, 16384, ['compute2']],
-     [2, 16384, ['compute2', 'compute1']],
-     [2, 8000, ['compute4', 'compute3']],
-    ].each do |min_nodes, min_ram, expect_nodes|
-      job = Job.new(uuid: 'zzzzz-8i9sb-382lhiizavzhqlp',
-                    runtime_constraints: {
-                      'min_nodes' => min_nodes,
-                      'min_ram_mb_per_node' => min_ram,
-                    })
-      nodes = dispatch.nodes_available_for_job_now job
-      assert_equal expect_nodes, nodes
-    end
-  end
-
-  test 'respond to TERM' do
-    lockfile = Rails.root.join 'tmp', 'dispatch.lock'
-    ENV['CRUNCH_DISPATCH_LOCKFILE'] = lockfile.to_s
-    begin
-      pid = Process.fork do
-        begin
-          dispatch = CrunchDispatch.new
-          dispatch.stubs(:did_recently).returns true
-          dispatch.run []
-        ensure
-          Process.exit!
-        end
-      end
-      assert_with_timeout 5, "Dispatch did not lock #{lockfile}" do
-        !can_lock(lockfile)
-      end
-    ensure
-      Process.kill("TERM", pid)
-    end
-    assert_with_timeout 20, "Dispatch did not unlock #{lockfile}" do
-      can_lock(lockfile)
-    end
-  end
-
-  test 'override --cgroup-root with CRUNCH_CGROUP_ROOT' do
-    ENV['CRUNCH_CGROUP_ROOT'] = '/path/to/cgroup'
-    Rails.configuration.Containers.JobsAPI.CrunchJobWrapper = "none"
-    act_as_system_user do
-      j = Job.create(repository: 'active/foo',
-                     script: 'hash',
-                     script_version: '4fe459abe02d9b365932b8f5dc419439ab4e2577',
-                     script_parameters: {})
-      ok = false
-      Open3.expects(:popen3).at_least_once.with do |*args|
-        if args.index(j.uuid)
-          ok = ((i = args.index '--cgroup-root') and
-                (args[i+1] == '/path/to/cgroup'))
-        end
-        true
-      end.raises(StandardError.new('all is well'))
-      dispatch = CrunchDispatch.new
-      dispatch.parse_argv ['--jobs']
-      dispatch.refresh_todo
-      dispatch.start_jobs
-      assert ok
-    end
-  end
-
-  def assert_with_timeout timeout, message
-    t = 0
-    while (t += 0.1) < timeout
-      if yield
-        return
-      end
-      sleep 0.1
-    end
-    assert false, message + " (waited #{timeout} seconds)"
-  end
-
-  def can_lock lockfile
-    lockfile.open(File::RDWR|File::CREAT, 0644) do |f|
-      return f.flock(File::LOCK_EX|File::LOCK_NB)
-    end
-  end
-
-  test 'rate limit of partial line segments' do
-    act_as_system_user do
-      Rails.configuration.Containers.Logging.LogPartialLineThrottlePeriod = 1
-
-      job = {}
-      job[:bytes_logged] = 0
-      job[:log_throttle_bytes_so_far] = 0
-      job[:log_throttle_lines_so_far] = 0
-      job[:log_throttle_bytes_skipped] = 0
-      job[:log_throttle_is_open] = true
-      job[:log_throttle_partial_line_last_at] = Time.new(0)
-      job[:log_throttle_first_partial_line] = true
-
-      dispatch = CrunchDispatch.new
-
-      line = "first log line"
-      limit = dispatch.rate_limit(job, line)
-      assert_equal true, limit
-      assert_equal "first log line", line
-      assert_equal 1, job[:log_throttle_lines_so_far]
-
-      # first partial line segment is skipped and counted towards skipped lines
-      now = Time.now.strftime('%Y-%m-%d-%H:%M:%S')
-      line = "#{now} localhost 100 0 stderr [...] this is first partial line segment [...]"
-      limit = dispatch.rate_limit(job, line)
-      assert_equal true, limit
-      assert_includes line, "Rate-limiting partial segments of long lines", line
-      assert_equal 2, job[:log_throttle_lines_so_far]
-
-      # next partial line segment within throttle interval is skipped but not counted towards skipped lines
-      line = "#{now} localhost 100 0 stderr [...] second partial line segment within the interval [...]"
-      limit = dispatch.rate_limit(job, line)
-      assert_equal false, limit
-      assert_equal 2, job[:log_throttle_lines_so_far]
-
-      # next partial line after interval is counted towards skipped lines
-      sleep(1)
-      line = "#{now} localhost 100 0 stderr [...] third partial line segment after the interval [...]"
-      limit = dispatch.rate_limit(job, line)
-      assert_equal false, limit
-      assert_equal 3, job[:log_throttle_lines_so_far]
-
-      # this is not a valid line segment
-      line = "#{now} localhost 100 0 stderr [...] does not end with [...] and is not a partial segment"
-      limit = dispatch.rate_limit(job, line)
-      assert_equal true, limit
-      assert_equal "#{now} localhost 100 0 stderr [...] does not end with [...] and is not a partial segment", line
-      assert_equal 4, job[:log_throttle_lines_so_far]
-
-      # this also is not a valid line segment
-      line = "#{now} localhost 100 0 stderr does not start correctly but ends with [...]"
-      limit = dispatch.rate_limit(job, line)
-      assert_equal true, limit
-      assert_equal "#{now} localhost 100 0 stderr does not start correctly but ends with [...]", line
-      assert_equal 5, job[:log_throttle_lines_so_far]
-    end
-  end
-
-  test 'scancel orphaned job nodes' do
-    Rails.configuration.Containers.JobsAPI.CrunchJobWrapper = "slurm_immediate"
-    act_as_system_user do
-      dispatch = CrunchDispatch.new
-
-      squeue_resp = IO.popen("echo zzzzz-8i9sb-pshmckwoma9plh7\necho thisisnotvalidjobuuid\necho zzzzz-8i9sb-4cf0abc123e809j\necho zzzzz-dz642-o04e3r651turtdr\n")
-      scancel_resp = IO.popen("true")
-
-      IO.expects(:popen).
-        with(['squeue', '-a', '-h', '-o', '%j']).
-        returns(squeue_resp)
-
-      IO.expects(:popen).
-        with(dispatch.sudo_preface + ['scancel', '-n', 'zzzzz-8i9sb-4cf0abc123e809j']).
-        returns(scancel_resp)
-
-      dispatch.check_orphaned_slurm_jobs
-    end
-  end
-end
diff --git a/services/api/test/unit/fail_jobs_test.rb b/services/api/test/unit/fail_jobs_test.rb
deleted file mode 100644
index 304335c6f..000000000
--- a/services/api/test/unit/fail_jobs_test.rb
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-require 'test_helper'
-require 'crunch_dispatch'
-
-class FailJobsTest < ActiveSupport::TestCase
-  include DbCurrentTime
-
-  BOOT_TIME = 1448378837
-
-  setup do
-    @job = {}
-    act_as_user users(:admin) do
-      @job[:before_reboot] = Job.create!(state: 'Running',
-                                         running: true,
-                                         started_at: Time.at(BOOT_TIME - 300))
-      @job[:after_reboot] = Job.create!(state: 'Running',
-                                        running: true,
-                                        started_at: Time.at(BOOT_TIME + 300))
-      @job[:complete] = Job.create!(state: 'Running',
-                                    running: true,
-                                    started_at: Time.at(BOOT_TIME - 300))
-      @job[:complete].update_attributes(state: 'Complete')
-      @job[:complete].update_attributes(finished_at: Time.at(BOOT_TIME + 100))
-      @job[:queued] = jobs(:queued)
-
-      @job.values.each do |job|
-        # backdate timestamps
-        Job.where(uuid: job.uuid).
-          update_all(created_at: Time.at(BOOT_TIME - 330),
-                     modified_at: (job.finished_at ||
-                                   job.started_at ||
-                                   Time.at(BOOT_TIME - 300)))
-      end
-    end
-    @dispatch = CrunchDispatch.new
-    @test_start_time = db_current_time
-  end
-
-  test 'cancel slurm jobs' do
-    Rails.configuration.Containers.JobsAPI.CrunchJobWrapper = "slurm_immediate"
-    Rails.configuration.Containers.JobsAPI.CrunchJobUser = 'foobar'
-    fake_squeue = IO.popen("echo #{@job[:before_reboot].uuid}")
-    fake_scancel = IO.popen("true")
-    IO.expects(:popen).
-      with(['squeue', '-a', '-h', '-o', '%j']).
-      returns(fake_squeue)
-    IO.expects(:popen).
-      with(includes('sudo', '-u', 'foobar', 'scancel', '-n', @job[:before_reboot].uuid)).
-      returns(fake_scancel)
-    @dispatch.fail_jobs(before: Time.at(BOOT_TIME).to_s)
-    assert_end_states
-  end
-
-  test 'use reboot time' do
-    Rails.configuration.Containers.JobsAPI.CrunchJobWrapper = nil
-    @dispatch.expects(:open).once.with('/proc/stat').
-      returns open(Rails.root.join('test/fixtures/files/proc_stat'))
-    @dispatch.fail_jobs(before: 'reboot')
-    assert_end_states
-  end
-
-  test 'command line help' do
-    cmd = Rails.root.join('script/fail-jobs.rb').to_s
-    assert_match(/Options:.*--before=/m, File.popen([cmd, '--help']).read)
-  end
-
-  protected
-
-  def assert_end_states
-    @job.values.map(&:reload)
-    assert_equal 'Failed', @job[:before_reboot].state
-    assert_equal false, @job[:before_reboot].running
-    assert_equal false, @job[:before_reboot].success
-    assert_operator @job[:before_reboot].finished_at, :>=, @test_start_time
-    assert_operator @job[:before_reboot].finished_at, :<=, db_current_time
-    assert_equal 'Running', @job[:after_reboot].state
-    assert_equal 'Complete', @job[:complete].state
-    assert_equal 'Queued', @job[:queued].state
-  end
-end

commit 4d56f9b913fcf41fbf89bf5016463b5353fa3a9f
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Tue Aug 6 15:20:47 2019 -0400

    15133: Delete crunch_scripts, start clearing out API server
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/crunch_scripts/GATK2-VariantFiltration b/crunch_scripts/GATK2-VariantFiltration
deleted file mode 100755
index 0ef4a7473..000000000
--- a/crunch_scripts/GATK2-VariantFiltration
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import os
-import re
-
-arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True)
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-gatk_path = arvados.util.tarball_extract(
-    tarball = this_job['script_parameters']['gatk_binary_tarball'],
-    path = 'gatk')
-bundle_path = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['gatk_bundle'],
-    path = 'gatk-bundle',
-    files = ['human_g1k_v37.dict', 'human_g1k_v37.fasta', 'human_g1k_v37.fasta.fai'])
-this_task_input = this_task['parameters']['input']
-
-input_file = list(arvados.CollectionReader(this_task_input).all_files())[0]
-
-# choose vcf temporary file names
-vcf_in = os.path.join(arvados.current_task().tmpdir,
-                      os.path.basename(input_file.name()))
-vcf_out = re.sub('(.*)\\.vcf', '\\1-filtered.vcf', vcf_in)
-
-# fetch the unfiltered data
-vcf_in_file = open(vcf_in, 'w')
-for buf in input_file.readall():
-    vcf_in_file.write(buf)
-vcf_in_file.close()
-
-stdoutdata, stderrdata = arvados.util.run_command(
-    ['java', '-Xmx1g',
-     '-jar', os.path.join(gatk_path,'GenomeAnalysisTK.jar'),
-     '-T', 'VariantFiltration', '--variant', vcf_in,
-     '--out', vcf_out,
-     '--filterExpression', 'QD < 2.0',
-     '--filterName', 'GATK_QD',
-     '--filterExpression', 'MQ < 40.0',
-     '--filterName', 'GATK_MQ',
-     '--filterExpression', 'FS > 60.0',
-     '--filterName', 'GATK_FS',
-     '--filterExpression', 'MQRankSum < -12.5',
-     '--filterName', 'GATK_MQRankSum',
-     '--filterExpression', 'ReadPosRankSum < -8.0',
-     '--filterName', 'GATK_ReadPosRankSum',
-     '-R', os.path.join(bundle_path, 'human_g1k_v37.fasta')],
-    cwd=arvados.current_task().tmpdir)
-
-# store the filtered data
-with open(vcf_out, 'rb') as f:
-    out = arvados.CollectionWriter()
-    while True:
-        buf = f.read()
-        if len(buf) == 0:
-            break
-        out.write(buf)
-out.set_current_file_name(os.path.basename(vcf_out))
-
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/GATK2-bqsr b/crunch_scripts/GATK2-bqsr
deleted file mode 100755
index ab7822602..000000000
--- a/crunch_scripts/GATK2-bqsr
+++ /dev/null
@@ -1,103 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import os
-import re
-import arvados
-import arvados_gatk2
-import arvados_samtools
-from arvados_ipc import *
-
-class InvalidArgumentError(Exception):
-    pass
-
-arvados_samtools.one_task_per_bam_file(if_sequence=0, and_end_task=True)
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-tmpdir = arvados.current_task().tmpdir
-arvados.util.clear_tmpdir()
-
-known_sites_files = arvados.getjobparam(
-    'known_sites',
-    ['dbsnp_137.b37.vcf',
-     'Mills_and_1000G_gold_standard.indels.b37.vcf',
-     ])
-bundle_dir = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['gatk_bundle'],
-    files = [
-        'human_g1k_v37.dict',
-        'human_g1k_v37.fasta',
-        'human_g1k_v37.fasta.fai'
-        ] + known_sites_files + [v + '.idx' for v in known_sites_files],
-    path = 'gatk_bundle')
-ref_fasta_files = [os.path.join(bundle_dir, f)
-                   for f in os.listdir(bundle_dir)
-                   if re.search(r'\.fasta(\.gz)?$', f)]
-
-input_collection = this_task['parameters']['input']
-input_dir = arvados.util.collection_extract(
-    collection = input_collection,
-    path = os.path.join(this_task.tmpdir, 'input'))
-input_bam_files = []
-for f in arvados.util.listdir_recursive(input_dir):
-    if re.search(r'\.bam$', f):
-        input_stream_name, input_file_name = os.path.split(f)
-        input_bam_files += [os.path.join(input_dir, f)]
-if len(input_bam_files) != 1:
-    raise InvalidArgumentError("Expected exactly one bam file per task.")
-
-known_sites_args = []
-for f in known_sites_files:
-    known_sites_args += ['-knownSites', os.path.join(bundle_dir, f)]
-
-recal_file = os.path.join(tmpdir, 'recal.csv')
-
-children = {}
-pipes = {}
-
-arvados_gatk2.run(
-    args=[
-        '-nct', arvados_gatk2.cpus_on_this_node(),
-        '-T', 'BaseRecalibrator',
-        '-R', ref_fasta_files[0],
-        '-I', input_bam_files[0],
-        '-o', recal_file,
-        ] + known_sites_args)
-
-pipe_setup(pipes, 'BQSR')
-if 0 == named_fork(children, 'BQSR'):
-    pipe_closeallbut(pipes, ('BQSR', 'w'))
-    arvados_gatk2.run(
-        args=[
-        '-T', 'PrintReads',
-        '-R', ref_fasta_files[0],
-        '-I', input_bam_files[0],
-        '-o', '/dev/fd/' + str(pipes['BQSR','w']),
-        '-BQSR', recal_file,
-        '--disable_bam_indexing',
-        ],
-        close_fds=False)
-    os._exit(0)
-os.close(pipes.pop(('BQSR','w'), None))
-
-out = arvados.CollectionWriter()
-out.start_new_stream(input_stream_name)
-
-out.start_new_file(input_file_name + '.recal.csv')
-out.write(open(recal_file, 'rb'))
-
-out.start_new_file(input_file_name)
-while True:
-    buf = os.read(pipes['BQSR','r'], 2**20)
-    if len(buf) == 0:
-        break
-    out.write(buf)
-pipe_closeallbut(pipes)
-
-if waitpid_and_check_children(children):
-    this_task.set_output(out.finish())
-else:
-    sys.exit(1)
diff --git a/crunch_scripts/GATK2-merge-call b/crunch_scripts/GATK2-merge-call
deleted file mode 100755
index 6d175172e..000000000
--- a/crunch_scripts/GATK2-merge-call
+++ /dev/null
@@ -1,242 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import os
-import re
-import string
-import threading
-import arvados
-import arvados_gatk2
-import arvados_picard
-from arvados_ipc import *
-
-class InvalidArgumentError(Exception):
-    pass
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-tmpdir = arvados.current_task().tmpdir
-arvados.util.clear_tmpdir()
-
-bundle_dir = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['gatk_bundle'],
-    files = [
-        'human_g1k_v37.dict',
-        'human_g1k_v37.fasta',
-        'human_g1k_v37.fasta.fai',
-        'dbsnp_137.b37.vcf',
-        'dbsnp_137.b37.vcf.idx',
-        ],
-    path = 'gatk_bundle')
-ref_fasta_files = [os.path.join(bundle_dir, f)
-                   for f in os.listdir(bundle_dir)
-                   if re.search(r'\.fasta(\.gz)?$', f)]
-regions_args = []
-if 'regions' in this_job['script_parameters']:
-    regions_dir = arvados.util.collection_extract(
-        collection = this_job['script_parameters']['regions'],
-        path = 'regions')
-    region_padding = int(this_job['script_parameters']['region_padding'])
-    for f in os.listdir(regions_dir):
-        if re.search(r'\.bed$', f):
-            regions_args += [
-                '--intervals', os.path.join(regions_dir, f),
-                '--interval_padding', str(region_padding)
-                ]
-
-
-# Start a child process for each input file, feeding data to picard.
-
-input_child_names = []
-children = {}
-pipes = {}
-
-input_collection = this_job['script_parameters']['input']
-input_index = 0
-for s in arvados.CollectionReader(input_collection).all_streams():
-    for f in s.all_files():
-        if not re.search(r'\.bam$', f.name()):
-            continue
-        input_index += 1
-        childname = 'input-' + str(input_index)
-        input_child_names += [childname]
-        pipe_setup(pipes, childname)
-        childpid = named_fork(children, childname)
-        if childpid == 0:
-            pipe_closeallbut(pipes, (childname, 'w'))
-            for s in f.readall():
-                os.write(pipes[childname, 'w'], s)
-            os.close(pipes[childname, 'w'])
-            os._exit(0)
-        sys.stderr.write("pid %d writing %s to fd %d->%d\n" %
-                         (childpid,
-                          s.name()+'/'+f.name(),
-                          pipes[childname, 'w'],
-                          pipes[childname, 'r']))
-        pipe_closeallbut(pipes, *[(childname, 'r')
-                                  for childname in input_child_names])
-
-
-# Merge-sort the input files to merge.bam
-
-arvados_picard.run(
-    'MergeSamFiles',
-    args=[
-        'I=/dev/fd/' + str(pipes[childname, 'r'])
-        for childname in input_child_names
-        ],
-    params={
-        'o': 'merge.bam',
-        'quiet': 'true',
-        'so': 'coordinate',
-        'use_threading': 'true',
-        'create_index': 'true',
-        'validation_stringency': 'LENIENT',
-        },
-    close_fds=False,
-    )
-pipe_closeallbut(pipes)
-
-
-# Run CoverageBySample on merge.bam
-
-pipe_setup(pipes, 'stats_log')
-pipe_setup(pipes, 'stats_out')
-if 0 == named_fork(children, 'GATK'):
-    pipe_closeallbut(pipes,
-                     ('stats_log', 'w'),
-                     ('stats_out', 'w'))
-    arvados_gatk2.run(
-        args=[
-            '-T', 'CoverageBySample',
-            '-R', ref_fasta_files[0],
-            '-I', 'merge.bam',
-            '-o', '/dev/fd/' + str(pipes['stats_out', 'w']),
-            '--log_to_file', '/dev/fd/' + str(pipes['stats_log', 'w']),
-            ]
-        + regions_args,
-        close_fds=False)
-    pipe_closeallbut(pipes)
-    os._exit(0)
-pipe_closeallbut(pipes, ('stats_log', 'r'), ('stats_out', 'r'))
-
-
-# Start two threads to read from CoverageBySample pipes
-
-class ExceptionPropagatingThread(threading.Thread):
-    """
-    If a subclassed thread calls _raise(e) in run(), running join() on
-    the thread will raise e in the thread that calls join().
-    """
-    def __init__(self, *args, **kwargs):
-        super(ExceptionPropagatingThread, self).__init__(*args, **kwargs)
-        self.__exception = None
-    def join(self, *args, **kwargs):
-        ret = super(ExceptionPropagatingThread, self).join(*args, **kwargs)
-        if self.__exception:
-            raise self.__exception
-        return ret
-    def _raise(self, exception):
-        self.__exception = exception
-
-class StatsLogReader(ExceptionPropagatingThread):
-    def __init__(self, **kwargs):
-        super(StatsLogReader, self).__init__()
-        self.args = kwargs
-    def run(self):
-        try:
-            for logline in self.args['infile']:
-                x = re.search('Processing (\d+) bp from intervals', logline)
-                if x:
-                    self._total_bp = int(x.group(1))
-        except Exception as e:
-            self._raise(e)
-    def total_bp(self):
-        self.join()
-        return self._total_bp
-stats_log_thr = StatsLogReader(infile=os.fdopen(pipes.pop(('stats_log', 'r'))))
-stats_log_thr.start()
-
-class StatsOutReader(ExceptionPropagatingThread):
-    """
-    Read output of CoverageBySample and collect a histogram of
-    coverage (last column) -> number of loci (number of rows).
-    """
-    def __init__(self, **kwargs):
-        super(StatsOutReader, self).__init__()
-        self.args = kwargs
-    def run(self):
-        try:
-            hist = [0]
-            histtot = 0
-            for line in self.args['infile']:
-                try:
-                    i = int(string.split(line)[-1])
-                except ValueError:
-                    continue
-                if i >= 1:
-                    if len(hist) <= i:
-                        hist.extend([0 for x in range(1+i-len(hist))])
-                    hist[i] += 1
-                    histtot += 1
-            hist[0] = stats_log_thr.total_bp() - histtot
-            self._histogram = hist
-        except Exception as e:
-            self._raise(e)
-    def histogram(self):
-        self.join()
-        return self._histogram
-stats_out_thr = StatsOutReader(infile=os.fdopen(pipes.pop(('stats_out', 'r'))))
-stats_out_thr.start()
-
-
-# Run UnifiedGenotyper on merge.bam
-
-arvados_gatk2.run(
-    args=[
-        '-nt', arvados_gatk2.cpus_on_this_node(),
-        '-T', 'UnifiedGenotyper',
-        '-R', ref_fasta_files[0],
-        '-I', 'merge.bam',
-        '-o', os.path.join(tmpdir, 'out.vcf'),
-        '--dbsnp', os.path.join(bundle_dir, 'dbsnp_137.b37.vcf'),
-        '-metrics', 'UniGenMetrics',
-        '-A', 'DepthOfCoverage',
-        '-A', 'AlleleBalance',
-        '-A', 'QualByDepth',
-        '-A', 'HaplotypeScore',
-        '-A', 'MappingQualityRankSumTest',
-        '-A', 'ReadPosRankSumTest',
-        '-A', 'FisherStrand',
-        '-glm', 'both',
-        ]
-    + regions_args
-    + arvados.getjobparam('GATK2_UnifiedGenotyper_args',[]))
-
-# Copy the output VCF file to Keep
-
-out = arvados.CollectionWriter()
-out.start_new_stream()
-out.start_new_file('out.vcf')
-out.write(open(os.path.join(tmpdir, 'out.vcf'), 'rb'))
-
-
-# Write statistics to Keep
-
-out.start_new_file('mincoverage_nlocus.csv')
-sofar = 0
-hist = stats_out_thr.histogram()
-total_bp = stats_log_thr.total_bp()
-for i in range(len(hist)):
-    out.write("%d,%d,%f\n" %
-              (i,
-               total_bp - sofar,
-               100.0 * (total_bp - sofar) / total_bp))
-    sofar += hist[i]
-
-if waitpid_and_check_children(children):
-    this_task.set_output(out.finish())
-else:
-    sys.exit(1)
diff --git a/crunch_scripts/GATK2-realign b/crunch_scripts/GATK2-realign
deleted file mode 100755
index 2787dffd5..000000000
--- a/crunch_scripts/GATK2-realign
+++ /dev/null
@@ -1,163 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import os
-import re
-import arvados
-import arvados_gatk2
-import arvados_picard
-import arvados_samtools
-from arvados_ipc import *
-
-class InvalidArgumentError(Exception):
-    pass
-
-arvados_samtools.one_task_per_bam_file(if_sequence=0, and_end_task=True)
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-tmpdir = arvados.current_task().tmpdir
-arvados.util.clear_tmpdir()
-
-known_sites_files = arvados.getjobparam(
-    'known_sites',
-    ['dbsnp_137.b37.vcf',
-     'Mills_and_1000G_gold_standard.indels.b37.vcf',
-     ])
-bundle_dir = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['gatk_bundle'],
-    files = [
-        'human_g1k_v37.dict',
-        'human_g1k_v37.fasta',
-        'human_g1k_v37.fasta.fai'
-        ] + known_sites_files + [v + '.idx' for v in known_sites_files],
-    path = 'gatk_bundle')
-ref_fasta_files = [os.path.join(bundle_dir, f)
-                   for f in os.listdir(bundle_dir)
-                   if re.search(r'\.fasta(\.gz)?$', f)]
-regions_args = []
-if 'regions' in this_job['script_parameters']:
-    regions_dir = arvados.util.collection_extract(
-        collection = this_job['script_parameters']['regions'],
-        path = 'regions')
-    region_padding = int(this_job['script_parameters']['region_padding'])
-    for f in os.listdir(regions_dir):
-        if re.search(r'\.bed$', f):
-            regions_args += [
-                '--intervals', os.path.join(regions_dir, f),
-                '--interval_padding', str(region_padding)
-                ]
-
-input_collection = this_task['parameters']['input']
-input_dir = arvados.util.collection_extract(
-    collection = input_collection,
-    path = os.path.join(this_task.tmpdir, 'input'))
-input_bam_files = []
-for f in arvados.util.listdir_recursive(input_dir):
-    if re.search(r'\.bam$', f):
-        input_stream_name, input_file_name = os.path.split(f)
-        input_bam_files += [os.path.join(input_dir, f)]
-if len(input_bam_files) != 1:
-    raise InvalidArgumentError("Expected exactly one bam file per task.")
-
-known_sites_args = []
-for f in known_sites_files:
-    known_sites_args += ['-known', os.path.join(bundle_dir, f)]
-
-children = {}
-pipes = {}
-
-arvados_gatk2.run(
-    args=[
-        '-nt', arvados_gatk2.cpus_per_task(),
-        '-T', 'RealignerTargetCreator',
-        '-R', ref_fasta_files[0],
-        '-I', input_bam_files[0],
-        '-o', os.path.join(tmpdir, 'intervals.list')
-        ] + known_sites_args + regions_args)
-
-pipe_setup(pipes, 'IndelRealigner')
-if 0 == named_fork(children, 'IndelRealigner'):
-    pipe_closeallbut(pipes, ('IndelRealigner', 'w'))
-    arvados_gatk2.run(
-        args=[
-        '-T', 'IndelRealigner',
-        '-R', ref_fasta_files[0],
-        '-targetIntervals', os.path.join(tmpdir, 'intervals.list'),
-        '-I', input_bam_files[0],
-        '-o', '/dev/fd/' + str(pipes['IndelRealigner','w']),
-        '--disable_bam_indexing',
-        ] + known_sites_args + regions_args,
-        close_fds=False)
-    os._exit(0)
-os.close(pipes.pop(('IndelRealigner','w'), None))
-
-pipe_setup(pipes, 'bammanifest')
-pipe_setup(pipes, 'bam')
-if 0==named_fork(children, 'bammanifest'):
-    pipe_closeallbut(pipes,
-                     ('IndelRealigner', 'r'),
-                     ('bammanifest', 'w'),
-                     ('bam', 'w'))
-    out = arvados.CollectionWriter()
-    out.start_new_stream(input_stream_name)
-    out.start_new_file(input_file_name)
-    while True:
-        buf = os.read(pipes['IndelRealigner','r'], 2**20)
-        if len(buf) == 0:
-            break
-        os.write(pipes['bam','w'], buf)
-        out.write(buf)
-    os.write(pipes['bammanifest','w'], out.manifest_text())
-    os.close(pipes['bammanifest','w'])
-    os._exit(0)
-
-pipe_setup(pipes, 'index')
-if 0==named_fork(children, 'index'):
-    pipe_closeallbut(pipes, ('bam', 'r'), ('index', 'w'))
-    arvados_picard.run(
-        'BuildBamIndex',
-        params={
-            'i': '/dev/fd/' + str(pipes['bam','r']),
-            'o': '/dev/fd/' + str(pipes['index','w']),
-            'quiet': 'true',
-            'validation_stringency': 'LENIENT'
-            },
-        close_fds=False)
-    os._exit(0)
-
-pipe_setup(pipes, 'indexmanifest')
-if 0==named_fork(children, 'indexmanifest'):
-    pipe_closeallbut(pipes, ('index', 'r'), ('indexmanifest', 'w'))
-    out = arvados.CollectionWriter()
-    out.start_new_stream(input_stream_name)
-    out.start_new_file(re.sub('\.bam$', '.bai', input_file_name))
-    while True:
-        buf = os.read(pipes['index','r'], 2**20)
-        if len(buf) == 0:
-            break
-        out.write(buf)
-    os.write(pipes['indexmanifest','w'], out.manifest_text())
-    os.close(pipes['indexmanifest','w'])
-    os._exit(0)
-
-pipe_closeallbut(pipes, ('bammanifest', 'r'), ('indexmanifest', 'r'))
-outmanifest = ''
-for which in ['bammanifest', 'indexmanifest']:
-    with os.fdopen(pipes[which,'r'], 'rb', 2**20) as f:
-        while True:
-            buf = f.read()
-            if buf == '':
-                break
-            outmanifest += buf
-
-all_ok = True
-for (childname, pid) in children.items():
-    all_ok = all_ok and waitpid_and_check_exit(pid, childname)
-
-if all_ok:
-    this_task.set_output(outmanifest)
-else:
-    sys.exit(1)
diff --git a/crunch_scripts/arvados-bcbio-nextgen.py b/crunch_scripts/arvados-bcbio-nextgen.py
deleted file mode 100755
index b7e19ecdd..000000000
--- a/crunch_scripts/arvados-bcbio-nextgen.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import subprocess
-import crunchutil.subst as subst
-import shutil
-import os
-import sys
-import time
-
-if len(arvados.current_task()['parameters']) > 0:
-    p = arvados.current_task()['parameters']
-else:
-    p = arvados.current_job()['script_parameters']
-
-t = arvados.current_task().tmpdir
-
-os.unlink("/usr/local/share/bcbio-nextgen/galaxy")
-os.mkdir("/usr/local/share/bcbio-nextgen/galaxy")
-shutil.copy("/usr/local/share/bcbio-nextgen/config/bcbio_system.yaml", "/usr/local/share/bcbio-nextgen/galaxy")
-
-with open("/usr/local/share/bcbio-nextgen/galaxy/tool_data_table_conf.xml", "w") as f:
-    f.write('''<tables>
-    <!-- Locations of indexes in the BWA mapper format -->
-    <table name="bwa_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/bwa_index.loc" />
-    </table>
-    <!-- Locations of indexes in the Bowtie2 mapper format -->
-    <table name="bowtie2_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/bowtie2_indices.loc" />
-    </table>
-    <!-- Locations of indexes in the Bowtie2 mapper format for TopHat2 to use -->
-    <table name="tophat2_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/bowtie2_indices.loc" />
-    </table>
-    <!-- Location of SAMTools indexes and other files -->
-    <table name="sam_fa_indexes" comment_char="#">
-        <columns>index, value, path</columns>
-        <file path="tool-data/sam_fa_indices.loc" />
-    </table>
-    <!-- Location of Picard dict file and other files -->
-    <table name="picard_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/picard_index.loc" />
-    </table>
-    <!-- Location of Picard dict files valid for GATK -->
-    <table name="gatk_picard_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/gatk_sorted_picard_index.loc" />
-    </table>
-</tables>
-''')
-
-os.mkdir("/usr/local/share/bcbio-nextgen/galaxy/tool-data")
-
-with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/bowtie2_indices.loc", "w") as f:
-    f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(dir $(bowtie2_indices))\n"))
-
-with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/bwa_index.loc", "w") as f:
-    f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(bwa_index))\n"))
-
-with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/gatk_sorted_picard_index.loc", "w") as f:
-    f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(gatk_sorted_picard_index))\n"))
-
-with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/picard_index.loc", "w") as f:
-    f.write(subst.do_substitution(p, "GRCh37\tGRCh37\tHuman (GRCh37)\t$(file $(picard_index))\n"))
-
-with open("/usr/local/share/bcbio-nextgen/galaxy/tool-data/sam_fa_indices.loc", "w") as f:
-    f.write(subst.do_substitution(p, "index\tGRCh37\t$(file $(sam_fa_indices))\n"))
-
-with open("/tmp/crunch-job/freebayes-variant.yaml", "w") as f:
-    f.write('''
-# Template for whole genome Illumina variant calling with FreeBayes
-# This is a GATK-free pipeline without post-alignment BAM pre-processing
-# (recalibration and realignment)
----
-details:
-  - analysis: variant2
-    genome_build: GRCh37
-    # to do multi-sample variant calling, assign samples the same metadata / batch
-    # metadata:
-    #   batch: your-arbitrary-batch-name
-    algorithm:
-      aligner: bwa
-      mark_duplicates: true
-      recalibrate: false
-      realign: false
-      variantcaller: freebayes
-      platform: illumina
-      quality_format: Standard
-      # for targetted projects, set the region
-      # variant_regions: /path/to/your.bed
-''')
-
-os.unlink("/usr/local/share/bcbio-nextgen/gemini_data")
-os.symlink(arvados.get_job_param_mount("gemini_data"), "/usr/local/share/bcbio-nextgen/gemini_data")
-
-os.chdir(arvados.current_task().tmpdir)
-
-rcode = subprocess.call(["bcbio_nextgen.py", "--workflow", "template", "/tmp/crunch-job/freebayes-variant.yaml", "project1",
-                         subst.do_substitution(p, "$(file $(R1))"),
-                         subst.do_substitution(p, "$(file $(R2))")])
-
-os.chdir("project1/work")
-
-os.symlink("/usr/local/share/bcbio-nextgen/galaxy/tool-data", "tool-data")
-
-rcode = subprocess.call(["bcbio_nextgen.py", "../config/project1.yaml", "-n", os.environ['CRUNCH_NODE_SLOTS']])
-
-print("run-command: completed with exit code %i (%s)" % (rcode, "success" if rcode == 0 else "failed"))
-
-if rcode == 0:
-    os.chdir("../final")
-
-    print("arvados-bcbio-nextgen: the follow output files will be saved to keep:")
-
-    subprocess.call(["find", ".", "-type", "f", "-printf", "arvados-bcbio-nextgen: %12.12s %h/%f\\n"])
-
-    print("arvados-bcbio-nextgen: start writing output to keep")
-
-    done = False
-    api = arvados.api('v1')
-    while not done:
-        try:
-            out = arvados.CollectionWriter()
-            out.write_directory_tree(".", max_manifest_depth=0)
-            outuuid = out.finish()
-            api.job_tasks().update(uuid=arvados.current_task()['uuid'],
-                                                 body={
-                                                     'output':outuuid,
-                                                     'success': (rcode == 0),
-                                                     'progress':1.0
-                                                 }).execute()
-            done = True
-        except Exception as e:
-            print("arvados-bcbio-nextgen: caught exception: {}".format(e))
-            time.sleep(5)
-
-sys.exit(rcode)
diff --git a/crunch_scripts/arvados_bwa.py b/crunch_scripts/arvados_bwa.py
deleted file mode 100644
index aefc1f064..000000000
--- a/crunch_scripts/arvados_bwa.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import re
-import os
-import sys
-import fcntl
-import subprocess
-
-bwa_install_path = None
-
-def install_path():
-    """
-    Extract the bwa source tree, build the bwa binary, and return the
-    path to the source tree.
-    """
-    global bwa_install_path
-    if bwa_install_path:
-        return bwa_install_path
-
-    bwa_install_path = arvados.util.tarball_extract(
-        tarball = arvados.current_job()['script_parameters']['bwa_tbz'],
-        path = 'bwa')
-
-    # build "bwa" binary
-    lockfile = open(os.path.split(bwa_install_path)[0] + '.bwa-make.lock',
-                    'w')
-    fcntl.flock(lockfile, fcntl.LOCK_EX)
-    arvados.util.run_command(['make', '-j16'], cwd=bwa_install_path)
-    lockfile.close()
-
-    return bwa_install_path
-
-def bwa_binary():
-    """
-    Return the path to the bwa executable.
-    """
-    return os.path.join(install_path(), 'bwa')
-
-def run(command, command_args, **kwargs):
-    """
-    Build and run the bwa binary.
-
-    command is the bwa module, e.g., "index" or "aln".
-
-    command_args is a list of additional command line arguments, e.g.,
-    ['-a', 'bwtsw', 'ref.fasta']
-
-    It is assumed that we are running in a Crunch job environment, and
-    the job's "bwa_tbz" parameter is a collection containing the bwa
-    source tree in a .tbz file.
-    """
-    execargs = [bwa_binary(),
-                command]
-    execargs += command_args
-    sys.stderr.write("%s.run: exec %s\n" % (__name__, str(execargs)))
-    arvados.util.run_command(
-        execargs,
-        cwd=arvados.current_task().tmpdir,
-        stderr=sys.stderr,
-        stdin=kwargs.get('stdin', subprocess.PIPE),
-        stdout=kwargs.get('stdout', sys.stderr))
-
-def one_task_per_pair_input_file(if_sequence=0, and_end_task=True):
-    """
-    Queue one task for each pair of fastq files in this job's input
-    collection.
-
-    Each new task will have two parameters, named "input_1" and
-    "input_2", each being a manifest containing a single fastq file.
-
-    A matching pair of files in the input collection is assumed to
-    have names "x_1.y" and "x_2.y".
-
-    Files in the input collection that are not part of a matched pair
-    are silently ignored.
-
-    if_sequence and and_end_task arguments have the same significance
-    as in arvados.job_setup.one_task_per_input_file().
-    """
-    if if_sequence != arvados.current_task()['sequence']:
-        return
-    job_input = arvados.current_job()['script_parameters']['input']
-    cr = arvados.CollectionReader(job_input)
-    all_files = []
-    for s in cr.all_streams():
-        all_files += list(s.all_files())
-    for s in cr.all_streams():
-        for left_file in s.all_files():
-            left_name = left_file.name()
-            right_file = None
-            right_name = re.sub(r'(.*_)1\.', '\g<1>2.', left_name)
-            if right_name == left_name:
-                continue
-            for f2 in s.all_files():
-                if right_name == f2.name():
-                    right_file = f2
-            if right_file != None:
-                new_task_attrs = {
-                    'job_uuid': arvados.current_job()['uuid'],
-                    'created_by_job_task_uuid': arvados.current_task()['uuid'],
-                    'sequence': if_sequence + 1,
-                    'parameters': {
-                        'input_1':left_file.as_manifest(),
-                        'input_2':right_file.as_manifest()
-                        }
-                    }
-                arvados.api().job_tasks().create(body=new_task_attrs).execute()
-    if and_end_task:
-        arvados.api().job_tasks().update(uuid=arvados.current_task()['uuid'],
-                                   body={'success':True}
-                                   ).execute()
-        exit(0)
diff --git a/crunch_scripts/arvados_gatk2.py b/crunch_scripts/arvados_gatk2.py
deleted file mode 100644
index fa00b44d8..000000000
--- a/crunch_scripts/arvados_gatk2.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import re
-import os
-import sys
-import fcntl
-import subprocess
-
-gatk2_install_path = None
-
-def install_path():
-    global gatk2_install_path
-    if gatk2_install_path:
-        return gatk2_install_path
-    gatk2_install_path = arvados.util.tarball_extract(
-        tarball = arvados.current_job()['script_parameters']['gatk_tbz'],
-        path = 'gatk2')
-    return gatk2_install_path
-
-def memory_limit():
-    taskspernode = int(os.environ.get('CRUNCH_NODE_SLOTS', '1'))
-    with open('/proc/meminfo', 'r') as f:
-        ram = int(re.search(r'MemTotal:\s*(\d+)', f.read()).group(1)) / 1024
-    if taskspernode > 1:
-        ram = ram / taskspernode
-    return max(ram-700, 500)
-
-def cpus_on_this_node():
-    with open('/proc/cpuinfo', 'r') as cpuinfo:
-        return max(int(os.environ.get('SLURM_CPUS_ON_NODE', 1)),
-                   len(re.findall(r'^processor\s*:\s*\d',
-                                  cpuinfo.read(),
-                                  re.MULTILINE)))
-
-def cpus_per_task():
-    return max(1, (cpus_on_this_node()
-                   / int(os.environ.get('CRUNCH_NODE_SLOTS', 1))))
-
-def run(**kwargs):
-    kwargs.setdefault('cwd', arvados.current_task().tmpdir)
-    kwargs.setdefault('stdout', sys.stderr)
-    execargs = ['java',
-                '-Xmx%dm' % memory_limit(),
-                '-Djava.io.tmpdir=' + arvados.current_task().tmpdir,
-                '-jar', os.path.join(install_path(), 'GenomeAnalysisTK.jar')]
-    execargs += [str(arg) for arg in kwargs.pop('args', [])]
-    sys.stderr.write("%s.run: exec %s\n" % (__name__, str(execargs)))
-    return arvados.util.run_command(execargs, **kwargs)
-
diff --git a/crunch_scripts/arvados_ipc.py b/crunch_scripts/arvados_ipc.py
deleted file mode 100644
index 97871627b..000000000
--- a/crunch_scripts/arvados_ipc.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import os
-import re
-import sys
-import subprocess
-
-def pipe_setup(pipes, name):
-    pipes[name,'r'], pipes[name,'w'] = os.pipe()
-
-def pipe_closeallbut(pipes, *keepus):
-    for n,m in pipes.keys():
-        if (n,m) not in keepus:
-            os.close(pipes.pop((n,m), None))
-
-def named_fork(children, name):
-    children[name] = os.fork()
-    return children[name]
-
-def waitpid_and_check_children(children):
-    """
-    Given a dict of childname->pid, wait for each child process to
-    finish, and report non-zero exit status on stderr. Return True if
-    all children exited 0.
-    """
-    all_ok = True
-    for (childname, pid) in children.items():
-        # all_ok must be on RHS here -- we need to call waitpid() on
-        # every child, even if all_ok is already False.
-        all_ok = waitpid_and_check_exit(pid, childname) and all_ok
-    return all_ok
-
-def waitpid_and_check_exit(pid, childname=''):
-    """
-    Wait for a child process to finish. If it exits non-zero, report
-    exit status on stderr (mentioning the given childname) and return
-    False. If it exits zero, return True.
-    """
-    _, childstatus = os.waitpid(pid, 0)
-    exitvalue = childstatus >> 8
-    signal = childstatus & 127
-    dumpedcore = childstatus & 128
-    if childstatus != 0:
-        sys.stderr.write("%s child %d failed: exit %d signal %d core %s\n"
-                         % (childname, pid, exitvalue, signal,
-                            ('y' if dumpedcore else 'n')))
-        return False
-    return True
-
diff --git a/crunch_scripts/arvados_picard.py b/crunch_scripts/arvados_picard.py
deleted file mode 100644
index 3d830dbca..000000000
--- a/crunch_scripts/arvados_picard.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import re
-import os
-import sys
-import fcntl
-import subprocess
-
-picard_install_path = None
-
-def install_path():
-    global picard_install_path
-    if picard_install_path:
-        return picard_install_path
-    zipball = arvados.current_job()['script_parameters']['picard_zip']
-    extracted = arvados.util.zipball_extract(
-        zipball = zipball,
-        path = 'picard')
-    for f in os.listdir(extracted):
-        if (re.search(r'^picard-tools-[\d\.]+$', f) and
-            os.path.exists(os.path.join(extracted, f, '.'))):
-            picard_install_path = os.path.join(extracted, f)
-            break
-    if not picard_install_path:
-        raise Exception("picard-tools-{version} directory not found in %s" %
-                        zipball)
-    return picard_install_path
-
-def run(module, **kwargs):
-    kwargs.setdefault('cwd', arvados.current_task().tmpdir)
-    execargs = ['java',
-                '-Xmx1500m',
-                '-Djava.io.tmpdir=' + arvados.current_task().tmpdir,
-                '-jar', os.path.join(install_path(), module + '.jar')]
-    execargs += [str(arg) for arg in kwargs.pop('args', [])]
-    for key, value in kwargs.pop('params', {}).items():
-        execargs += [key.upper() + '=' + str(value)]
-    sys.stderr.write("%s.run: exec %s\n" % (__name__, str(execargs)))
-    return arvados.util.run_command(execargs, **kwargs)
diff --git a/crunch_scripts/arvados_samtools.py b/crunch_scripts/arvados_samtools.py
deleted file mode 100644
index 09992f6f2..000000000
--- a/crunch_scripts/arvados_samtools.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import re
-import os
-import sys
-import fcntl
-import subprocess
-
-samtools_path = None
-
-def samtools_install_path():
-    """
-    Extract the samtools source tree, build the samtools binary, and
-    return the path to the source tree.
-    """
-    global samtools_path
-    if samtools_path:
-        return samtools_path
-    samtools_path = arvados.util.tarball_extract(
-        tarball = arvados.current_job()['script_parameters']['samtools_tgz'],
-        path = 'samtools')
-
-    # build "samtools" binary
-    lockfile = open(os.path.split(samtools_path)[0] + '.samtools-make.lock',
-                    'w')
-    fcntl.flock(lockfile, fcntl.LOCK_EX)
-    arvados.util.run_command(['make', '-j16'], cwd=samtools_path)
-    lockfile.close()
-
-    return samtools_path
-
-def samtools_binary():
-    """
-    Return the path to the samtools executable.
-    """
-    return os.path.join(samtools_install_path(), 'samtools')
-
-def run(command, command_args, **kwargs):
-    """
-    Build and run the samtools binary.
-
-    command is the samtools subcommand, e.g., "view" or "sort".
-
-    command_args is a list of additional command line arguments, e.g.,
-    ['-bt', 'ref_list.txt', '-o', 'aln.bam', 'aln.sam.gz']
-
-    It is assumed that we are running in a Crunch job environment, and
-    the job's "samtools_tgz" parameter is a collection containing the
-    samtools source tree in a .tgz file.
-    """
-    execargs = [samtools_binary(),
-                command]
-    execargs += command_args
-    sys.stderr.write("%s.run: exec %s\n" % (__name__, str(execargs)))
-    arvados.util.run_command(
-        execargs,
-        cwd=arvados.current_task().tmpdir,
-        stdin=kwargs.get('stdin', subprocess.PIPE),
-        stderr=kwargs.get('stderr', sys.stderr),
-        stdout=kwargs.get('stdout', sys.stderr))
-
-def one_task_per_bam_file(if_sequence=0, and_end_task=True):
-    """
-    Queue one task for each bam file in this job's input collection.
-
-    Each new task will have an "input" parameter: a manifest
-    containing one .bam file and (if available) the corresponding .bai
-    index file.
-
-    Files in the input collection that are not named *.bam or *.bai
-    (as well as *.bai files that do not match any .bam file present)
-    are silently ignored.
-
-    if_sequence and and_end_task arguments have the same significance
-    as in arvados.job_setup.one_task_per_input_file().
-    """
-    if if_sequence != arvados.current_task()['sequence']:
-        return
-    job_input = arvados.current_job()['script_parameters']['input']
-    cr = arvados.CollectionReader(job_input)
-    bam = {}
-    bai = {}
-    for s in cr.all_streams():
-        for f in s.all_files():
-            if re.search(r'\.bam$', f.name()):
-                bam[s.name(), f.name()] = f
-            elif re.search(r'\.bai$', f.name()):
-                bai[s.name(), f.name()] = f
-    for ((s_name, f_name), bam_f) in bam.items():
-        bai_f = bai.get((s_name, re.sub(r'bam$', 'bai', f_name)), None)
-        task_input = bam_f.as_manifest()
-        if bai_f:
-            task_input += bai_f.as_manifest()
-        new_task_attrs = {
-            'job_uuid': arvados.current_job()['uuid'],
-            'created_by_job_task_uuid': arvados.current_task()['uuid'],
-            'sequence': if_sequence + 1,
-            'parameters': {
-                'input': task_input
-                }
-            }
-        arvados.api().job_tasks().create(body=new_task_attrs).execute()
-    if and_end_task:
-        arvados.api().job_tasks().update(uuid=arvados.current_task()['uuid'],
-                                         body={'success':True}
-                                         ).execute()
-        exit(0)
diff --git a/crunch_scripts/bwa-aln b/crunch_scripts/bwa-aln
deleted file mode 100755
index e3d85a7c3..000000000
--- a/crunch_scripts/bwa-aln
+++ /dev/null
@@ -1,127 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import arvados_bwa
-import arvados_samtools
-import os
-import re
-import sys
-import subprocess
-
-arvados_bwa.one_task_per_pair_input_file(if_sequence=0, and_end_task=True)
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-ref_dir = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['reference_index'],
-    path = 'reference',
-    decompress = False)
-
-ref_basename = None
-for f in os.listdir(ref_dir):
-    basename = re.sub(r'\.bwt$', '', f)
-    if basename != f:
-        ref_basename = os.path.join(ref_dir, basename)
-if ref_basename == None:
-    raise Exception("Could not find *.bwt in reference collection.")
-
-tmp_dir = arvados.current_task().tmpdir
-
-class Aligner:
-    def input_filename(self):
-        for s in arvados.CollectionReader(self.collection).all_streams():
-            for f in s.all_files():
-                return f.decompressed_name()
-    def generate_input(self):
-        for s in arvados.CollectionReader(self.collection).all_streams():
-            for f in s.all_files():
-                for s in f.readall_decompressed():
-                    yield s
-    def aln(self, input_param):
-        self.collection = this_task['parameters'][input_param]
-        reads_filename = os.path.join(tmp_dir, self.input_filename())
-        aln_filename = os.path.join(tmp_dir, self.input_filename() + '.sai')
-        reads_pipe_r, reads_pipe_w = os.pipe()
-        if os.fork() == 0:
-            os.close(reads_pipe_r)
-            reads_file = open(reads_filename, 'wb')
-            for s in self.generate_input():
-                if len(s) != os.write(reads_pipe_w, s):
-                    raise Exception("short write")
-                reads_file.write(s)
-            reads_file.close()
-            os.close(reads_pipe_w)
-            sys.exit(0)
-        os.close(reads_pipe_w)
-
-        aln_file = open(aln_filename, 'wb')
-        bwa_proc = subprocess.Popen(
-            [arvados_bwa.bwa_binary(),
-             'aln', '-t', '16',
-             ref_basename,
-             '-'],
-            stdin=os.fdopen(reads_pipe_r, 'rb', 2**20),
-            stdout=aln_file)
-        aln_file.close()
-        return reads_filename, aln_filename
-
-reads_1, alignments_1 = Aligner().aln('input_1')
-reads_2, alignments_2 = Aligner().aln('input_2')
-pid1, exit1 = os.wait()
-pid2, exit2 = os.wait()
-if exit1 != 0 or exit2 != 0:
-    raise Exception("bwa aln exited non-zero (0x%x, 0x%x)" % (exit1, exit2))
-
-# output alignments in sam format to pipe
-sam_pipe_r, sam_pipe_w = os.pipe()
-sam_pid = os.fork()
-if sam_pid != 0:
-    # parent
-    os.close(sam_pipe_w)
-else:
-    # child
-    os.close(sam_pipe_r)
-    arvados_bwa.run('sampe',
-                    [ref_basename,
-                     alignments_1, alignments_2,
-                     reads_1, reads_2],
-                    stdout=os.fdopen(sam_pipe_w, 'wb', 2**20))
-    sys.exit(0)
-
-# convert sam (sam_pipe_r) to bam (bam_pipe_w)
-bam_pipe_r, bam_pipe_w = os.pipe()
-bam_pid = os.fork()
-if bam_pid != 0:
-    # parent
-    os.close(bam_pipe_w)
-    os.close(sam_pipe_r)
-else:
-    # child
-    os.close(bam_pipe_r)
-    arvados_samtools.run('view',
-                         ['-S', '-b',
-                          '-'],
-                         stdin=os.fdopen(sam_pipe_r, 'rb', 2**20),
-                         stdout=os.fdopen(bam_pipe_w, 'wb', 2**20))
-    sys.exit(0)
-
-# copy bam (bam_pipe_r) to Keep
-out_bam_filename = os.path.split(reads_1)[-1] + '.bam'
-out = arvados.CollectionWriter()
-out.start_new_stream()
-out.start_new_file(out_bam_filename)
-out.write(os.fdopen(bam_pipe_r, 'rb', 2**20))
-
-# make sure everyone exited nicely
-pid3, exit3 = os.waitpid(sam_pid, 0)
-if exit3 != 0:
-    raise Exception("bwa sampe exited non-zero (0x%x)" % exit3)
-pid4, exit4 = os.waitpid(bam_pid, 0)
-if exit4 != 0:
-    raise Exception("samtools view exited non-zero (0x%x)" % exit4)
-
-# proclaim success
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/bwa-index b/crunch_scripts/bwa-index
deleted file mode 100755
index f5b7030c0..000000000
--- a/crunch_scripts/bwa-index
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import arvados_bwa
-import os
-import re
-import sys
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-ref_dir = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['input'],
-    path = 'reference',
-    decompress = False)
-
-ref_fasta_files = (os.path.join(ref_dir, f)
-                   for f in os.listdir(ref_dir)
-                   if re.search(r'\.fasta(\.gz)?$', f))
-
-# build reference index
-arvados_bwa.run('index',
-                ['-a', 'bwtsw'] + list(ref_fasta_files))
-
-# move output files to new empty directory
-out_dir = os.path.join(arvados.current_task().tmpdir, 'out')
-arvados.util.run_command(['rm', '-rf', out_dir], stderr=sys.stderr)
-os.mkdir(out_dir)
-for f in os.listdir(ref_dir):
-    if re.search(r'\.(amb|ann|bwt|pac|rbwt|rpac|rsa|sa)$', f):
-        sys.stderr.write("bwa output: %s (%d)\n" %
-                         (f, os.stat(os.path.join(ref_dir, f)).st_size))
-        os.rename(os.path.join(ref_dir, f),
-                  os.path.join(out_dir, f))
-
-# store output
-out = arvados.CollectionWriter()
-out.write_directory_tree(out_dir, max_manifest_depth=0)
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/collection-merge b/crunch_scripts/collection-merge
deleted file mode 100755
index f3aa5ce9c..000000000
--- a/crunch_scripts/collection-merge
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# collection-merge
-#
-# Merge two or more collections together.  Can also be used to extract specific
-# files from a collection to produce a new collection.
-#
-# input:
-# An array of collections or collection/file paths in script_parameter["input"]
-#
-# output:
-# A manifest with the collections merged.  Duplicate file names will
-# have their contents concatenated in the order that they appear in the input
-# array.
-
-import arvados
-import md5
-import crunchutil.subst as subst
-import subprocess
-import os
-import hashlib
-
-p = arvados.current_job()['script_parameters']
-
-merged = ""
-src = []
-for c in p["input"]:
-    c = subst.do_substitution(p, c)
-    i = c.find('/')
-    if i == -1:
-        src.append(c)
-        merged += arvados.CollectionReader(c).manifest_text()
-    else:
-        src.append(c[0:i])
-        cr = arvados.CollectionReader(c[0:i])
-        j = c.rfind('/')
-        stream = c[i+1:j]
-        if stream == "":
-            stream = "."
-        fn = c[(j+1):]
-        for s in cr.all_streams():
-            if s.name() == stream:
-                if fn in s.files():
-                    merged += s.files()[fn].as_manifest()
-
-arvados.current_task().set_output(merged)
diff --git a/crunch_scripts/crunchrunner b/crunch_scripts/crunchrunner
deleted file mode 100755
index 25d3ba524..000000000
--- a/crunch_scripts/crunchrunner
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-if test -n "$JOB_PARAMETER_CRUNCHRUNNER" ; then
-    exec $TASK_KEEPMOUNT/$JOB_PARAMETER_CRUNCHRUNNER
-else
-    exec /usr/local/bin/crunchrunner
-fi
diff --git a/crunch_scripts/crunchutil/__init__.py b/crunch_scripts/crunchutil/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/crunch_scripts/crunchutil/robust_put.py b/crunch_scripts/crunchutil/robust_put.py
deleted file mode 100644
index 27b0bf345..000000000
--- a/crunch_scripts/crunchutil/robust_put.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import arvados.commands.put as put
-import os
-import logging
-import time
-
-def machine_progress(bytes_written, bytes_expected):
-    return "upload wrote {} total {}\n".format(
-        bytes_written, -1 if (bytes_expected is None) else bytes_expected)
-
-class Args(object):
-    def __init__(self, fn):
-        self.filename = None
-        self.paths = [fn]
-        self.max_manifest_depth = 0
-
-# Upload to Keep with error recovery.
-# Return a uuid or raise an exception if there are too many failures.
-def upload(source_dir, logger=None):
-    if logger is None:
-        logger = logging.getLogger("arvados")
-
-    source_dir = os.path.abspath(source_dir)
-    done = False
-    if 'TASK_WORK' in os.environ:
-        resume_cache = put.ResumeCache(os.path.join(arvados.current_task().tmpdir, "upload-output-checkpoint"))
-    else:
-        resume_cache = put.ResumeCache(put.ResumeCache.make_path(Args(source_dir)))
-    reporter = put.progress_writer(machine_progress)
-    bytes_expected = put.expected_bytes_for([source_dir])
-    backoff = 1
-    outuuid = None
-    while not done:
-        try:
-            out = put.ArvPutCollectionWriter.from_cache(resume_cache, reporter, bytes_expected)
-            out.do_queued_work()
-            out.write_directory_tree(source_dir, max_manifest_depth=0)
-            outuuid = out.finish()
-            done = True
-        except KeyboardInterrupt as e:
-            logger.critical("caught interrupt signal 2")
-            raise e
-        except Exception as e:
-            logger.exception("caught exception:")
-            backoff *= 2
-            if backoff > 256:
-                logger.critical("Too many upload failures, giving up")
-                raise e
-            else:
-                logger.warning("Sleeping for %s seconds before trying again" % backoff)
-                time.sleep(backoff)
-    return outuuid
diff --git a/crunch_scripts/crunchutil/subst.py b/crunch_scripts/crunchutil/subst.py
deleted file mode 100644
index 53def97f9..000000000
--- a/crunch_scripts/crunchutil/subst.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import glob
-import os
-import re
-import stat
-
-BACKSLASH_ESCAPE_RE = re.compile(r'\\(.)')
-
-class SubstitutionError(Exception):
-    pass
-
-def search(c):
-    DEFAULT = 0
-    DOLLAR = 1
-
-    i = 0
-    state = DEFAULT
-    start = None
-    depth = 0
-    while i < len(c):
-        if c[i] == '\\':
-            i += 1
-        elif state == DEFAULT:
-            if c[i] == '$':
-                state = DOLLAR
-                if depth == 0:
-                    start = i
-            elif c[i] == ')':
-                if depth == 1:
-                    return [start, i]
-                if depth > 0:
-                    depth -= 1
-        elif state == DOLLAR:
-            if c[i] == '(':
-                depth += 1
-            state = DEFAULT
-        i += 1
-    if depth != 0:
-        raise SubstitutionError("Substitution error, mismatched parentheses {}".format(c))
-    return None
-
-def sub_file(v):
-    path = os.path.join(os.environ['TASK_KEEPMOUNT'], v)
-    st = os.stat(path)
-    if st and stat.S_ISREG(st.st_mode):
-        return path
-    else:
-        raise SubstitutionError("$(file {}) is not accessible or is not a regular file".format(path))
-
-def sub_dir(v):
-    d = os.path.dirname(v)
-    if d == '':
-        d = v
-    path = os.path.join(os.environ['TASK_KEEPMOUNT'], d)
-    st = os.stat(path)
-    if st and stat.S_ISDIR(st.st_mode):
-        return path
-    else:
-        raise SubstitutionError("$(dir {}) is not accessible or is not a directory".format(path))
-
-def sub_basename(v):
-    return os.path.splitext(os.path.basename(v))[0]
-
-def sub_glob(v):
-    l = glob.glob(v)
-    if len(l) == 0:
-        raise SubstitutionError("$(glob {}) no match found".format(v))
-    else:
-        return l[0]
-
-default_subs = {"file ": sub_file,
-                "dir ": sub_dir,
-                "basename ": sub_basename,
-                "glob ": sub_glob}
-
-def do_substitution(p, c, subs=default_subs):
-    while True:
-        m = search(c)
-        if m is None:
-            return BACKSLASH_ESCAPE_RE.sub(r'\1', c)
-
-        v = do_substitution(p, c[m[0]+2 : m[1]])
-        var = True
-        for sub in subs:
-            if v.startswith(sub):
-                r = subs[sub](v[len(sub):])
-                var = False
-                break
-        if var:
-            if v in p:
-                r = p[v]
-            else:
-                raise SubstitutionError("Unknown variable or function '%s' while performing substitution on '%s'" % (v, c))
-            if r is None:
-                raise SubstitutionError("Substitution for '%s' is null while performing substitution on '%s'" % (v, c))
-            if not isinstance(r, basestring):
-                raise SubstitutionError("Substitution for '%s' must be a string while performing substitution on '%s'" % (v, c))
-
-        c = c[:m[0]] + r + c[m[1]+1:]
diff --git a/crunch_scripts/crunchutil/vwd.py b/crunch_scripts/crunchutil/vwd.py
deleted file mode 100644
index 3245da14b..000000000
--- a/crunch_scripts/crunchutil/vwd.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import os
-import stat
-import arvados.commands.run
-import logging
-
-# Implements "Virtual Working Directory"
-# Provides a way of emulating a shared writable directory in Keep based
-# on a "check out, edit, check in, merge" model.
-# At the moment, this only permits adding new files, applications
-# cannot modify or delete existing files.
-
-# Create a symlink tree rooted at target_dir mirroring arv-mounted
-# source_collection.  target_dir must be empty, and will be created if it
-# doesn't exist.
-def checkout(source_collection, target_dir, keepmount=None):
-    # create symlinks
-    if keepmount is None:
-        keepmount = os.environ['TASK_KEEPMOUNT']
-
-    if not os.path.exists(target_dir):
-        os.makedirs(target_dir)
-
-    l = os.listdir(target_dir)
-    if len(l) > 0:
-        raise Exception("target_dir must be empty before checkout, contains %s" % l)
-
-    stem = os.path.join(keepmount, source_collection)
-    for root, dirs, files in os.walk(os.path.join(keepmount, source_collection), topdown=True):
-        rel = root[len(stem)+1:]
-        for d in dirs:
-            os.mkdir(os.path.join(target_dir, rel, d))
-        for f in files:
-            os.symlink(os.path.join(root, f), os.path.join(target_dir, rel, f))
-
-def checkin(target_dir):
-    """Write files in `target_dir` to Keep.
-
-    Regular files or symlinks to files outside the keep mount are written to
-    Keep as normal files (Keep does not support symlinks).
-
-    Symlinks to files in the keep mount will result in files in the new
-    collection which reference existing Keep blocks, no data copying necessary.
-
-    Returns a new Collection object, with data flushed but the collection record
-    not saved to the API.
-
-    """
-
-    outputcollection = arvados.collection.Collection(num_retries=5)
-
-    if target_dir[-1:] != '/':
-        target_dir += '/'
-
-    collections = {}
-
-    logger = logging.getLogger("arvados")
-
-    last_error = None
-    for root, dirs, files in os.walk(target_dir):
-        for f in files:
-            try:
-                s = os.lstat(os.path.join(root, f))
-
-                writeIt = False
-
-                if stat.S_ISREG(s.st_mode):
-                    writeIt = True
-                elif stat.S_ISLNK(s.st_mode):
-                    # 1. check if it is a link into a collection
-                    real = os.path.split(os.path.realpath(os.path.join(root, f)))
-                    (pdh, branch) = arvados.commands.run.is_in_collection(real[0], real[1])
-                    if pdh is not None:
-                        # 2. load collection
-                        if pdh not in collections:
-                            # 2.1 make sure it is flushed (see #5787 note 11)
-                            fd = os.open(real[0], os.O_RDONLY)
-                            os.fsync(fd)
-                            os.close(fd)
-
-                            # 2.2 get collection from API server
-                            collections[pdh] = arvados.collection.CollectionReader(pdh,
-                                                                                   api_client=outputcollection._my_api(),
-                                                                                   keep_client=outputcollection._my_keep(),
-                                                                                   num_retries=5)
-                        # 3. copy arvfile to new collection
-                        outputcollection.copy(branch, os.path.join(root[len(target_dir):], f), source_collection=collections[pdh])
-                    else:
-                        writeIt = True
-
-                if writeIt:
-                    reldir = root[len(target_dir):]
-                    with outputcollection.open(os.path.join(reldir, f), "wb") as writer:
-                        with open(os.path.join(root, f), "rb") as reader:
-                            dat = reader.read(64*1024)
-                            while dat:
-                                writer.write(dat)
-                                dat = reader.read(64*1024)
-            except (IOError, OSError) as e:
-                logger.error(e)
-                last_error = e
-
-    return (outputcollection, last_error)
diff --git a/crunch_scripts/cwl-runner b/crunch_scripts/cwl-runner
deleted file mode 100755
index 0c79844d5..000000000
--- a/crunch_scripts/cwl-runner
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Crunch script integration for running arvados-cwl-runner inside a crunch job.
-
-import arvados_cwl
-import sys
-
-try:
-    # Use the crunch script defined in the arvados_cwl package.  This helps
-    # prevent the crunch script from going out of sync with the rest of the
-    # arvados_cwl package.
-    import arvados_cwl.crunch_script
-    arvados_cwl.crunch_script.run()
-    sys.exit()
-except ImportError:
-    pass
-
-# When running against an older arvados-cwl-runner package without
-# arvados_cwl.crunch_script, fall back to the old code.
-
-
-# This gets the job record, transforms the script parameters into a valid CWL
-# input object, then executes the CWL runner to run the underlying workflow or
-# tool.  When the workflow completes, record the output object in an output
-# collection for this runner job.
-
-import arvados
-import arvados.collection
-import arvados.util
-import cwltool.main
-import logging
-import os
-import json
-import argparse
-import re
-import functools
-
-from arvados.api import OrderedJsonModel
-from cwltool.process import shortname, adjustFileObjs, adjustDirObjs, getListing, normalizeFilesDirs
-from cwltool.load_tool import load_tool
-
-# Print package versions
-logging.info(cwltool.main.versionstring())
-
-api = arvados.api("v1")
-
-try:
-    job_order_object = arvados.current_job()['script_parameters']
-
-    pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$')
-
-    def keeppath(v):
-        if pdh_path.match(v):
-            return "keep:%s" % v
-        else:
-            return v
-
-    def keeppathObj(v):
-        v["location"] = keeppath(v["location"])
-
-    job_order_object["cwl:tool"] = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object["cwl:tool"])
-
-    for k,v in job_order_object.items():
-        if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v):
-            job_order_object[k] = {
-                "class": "File",
-                "location": "keep:%s" % v
-            }
-
-    adjustFileObjs(job_order_object, keeppathObj)
-    adjustDirObjs(job_order_object, keeppathObj)
-    normalizeFilesDirs(job_order_object)
-    adjustDirObjs(job_order_object, functools.partial(getListing, arvados_cwl.fsaccess.CollectionFsAccess("", api_client=api)))
-
-    output_name = None
-    if "arv:output_name" in job_order_object:
-        output_name = job_order_object["arv:output_name"]
-        del job_order_object["arv:output_name"]
-
-    runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api('v1', model=OrderedJsonModel()),
-                                      output_name=output_name)
-
-    t = load_tool(job_order_object, runner.arv_make_tool)
-
-    args = argparse.Namespace()
-    args.project_uuid = arvados.current_job()["owner_uuid"]
-    args.enable_reuse = True
-    args.submit = False
-    args.debug = True
-    args.quiet = False
-    args.ignore_docker_for_reuse = False
-    args.basedir = os.getcwd()
-    args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]}
-    outputObj = runner.arv_executor(t, job_order_object, **vars(args))
-
-    if runner.final_output_collection:
-        outputCollection = runner.final_output_collection.portable_data_hash()
-    else:
-        outputCollection = None
-
-    api.job_tasks().update(uuid=arvados.current_task()['uuid'],
-                                         body={
-                                             'output': outputCollection,
-                                             'success': True,
-                                             'progress':1.0
-                                         }).execute()
-except Exception as e:
-    logging.exception("Unhandled exception")
-    api.job_tasks().update(uuid=arvados.current_task()['uuid'],
-                                         body={
-                                             'output': None,
-                                             'success': False,
-                                             'progress':1.0
-                                         }).execute()
diff --git a/crunch_scripts/decompress-all.py b/crunch_scripts/decompress-all.py
deleted file mode 100755
index 100ea1223..000000000
--- a/crunch_scripts/decompress-all.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-#
-# decompress-all.py
-#
-# Decompress all compressed files in the collection using the "dtrx" tool and
-# produce a new collection with the contents.  Uncompressed files
-# are passed through.
-#
-# input:
-# A collection at script_parameters["input"]
-#
-# output:
-# A manifest of the uncompressed contents of the input collection.
-
-import arvados
-import re
-import subprocess
-import os
-import sys
-import crunchutil.robust_put as robust_put
-
-arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True,
-                                          input_as_path=True)
-
-task = arvados.current_task()
-
-input_file = task['parameters']['input']
-
-infile_parts = re.match(r"(^[a-f0-9]{32}\+\d+)(\+\S+)*(/.*)?(/[^/]+)$", input_file)
-
-outdir = os.path.join(task.tmpdir, "output")
-os.makedirs(outdir)
-os.chdir(outdir)
-
-if infile_parts is None:
-    print >>sys.stderr, "Failed to parse input filename '%s' as a Keep file\n" % input_file
-    sys.exit(1)
-
-cr = arvados.CollectionReader(infile_parts.group(1))
-streamname = infile_parts.group(3)[1:]
-filename = infile_parts.group(4)[1:]
-
-if streamname is not None:
-    subprocess.call(["mkdir", "-p", streamname])
-    os.chdir(streamname)
-else:
-    streamname = '.'
-
-m = re.match(r'.*\.(gz|Z|bz2|tgz|tbz|zip|rar|7z|cab|deb|rpm|cpio|gem)$', arvados.get_task_param_mount('input'), re.IGNORECASE)
-
-if m is not None:
-    rc = subprocess.call(["dtrx", "-r", "-n", "-q", arvados.get_task_param_mount('input')])
-    if rc == 0:
-        task.set_output(robust_put.upload(outdir))
-    else:
-        sys.exit(rc)
-else:
-    streamreader = filter(lambda s: s.name() == streamname, cr.all_streams())[0]
-    filereader = streamreader.files()[filename]
-    task.set_output(streamname + filereader.as_manifest()[1:])
diff --git a/crunch_scripts/file-select b/crunch_scripts/file-select
deleted file mode 100755
index c4af05c82..000000000
--- a/crunch_scripts/file-select
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import os
-import re
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-this_job_input = this_job['script_parameters']['input']
-manifest_text = ""
-for f in arvados.CollectionReader(this_job_input).all_files():
-    if f.name() in this_job['script_parameters']['names']:
-        manifest_text += f.as_manifest()
-
-this_task.set_output(arvados.Keep.put(manifest_text))
diff --git a/crunch_scripts/grep b/crunch_scripts/grep
deleted file mode 100755
index a84c0f671..000000000
--- a/crunch_scripts/grep
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import re
-
-arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True)
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-this_task_input = this_task['parameters']['input']
-pattern = re.compile(this_job['script_parameters']['pattern'])
-
-input_file = list(arvados.CollectionReader(this_task_input).all_files())[0]
-out = arvados.CollectionWriter()
-out.set_current_file_name(input_file.decompressed_name())
-out.set_current_stream_name(input_file.stream_name())
-for line in input_file.readlines():
-    if pattern.search(line):
-        out.write(line)
-
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/hash b/crunch_scripts/hash
deleted file mode 100755
index 56eec7a5f..000000000
--- a/crunch_scripts/hash
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python                                                                                                                                                                            
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import hashlib
-import os
-
-arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True, input_as_path=True)
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-
-if 'algorithm' in this_job['script_parameters']:
-    alg = this_job['script_parameters']['algorithm']
-else:
-    alg = 'md5'
-digestor = hashlib.new(alg)
-
-input_file = arvados.get_task_param_mount('input')
-
-with open(input_file) as f:
-    while True:
-        buf = f.read(2**20)
-        if len(buf) == 0:
-            break
-        digestor.update(buf)
-
-hexdigest = digestor.hexdigest()
-
-file_name = '/'.join(this_task['parameters']['input'].split('/')[1:])
-
-out = arvados.CollectionWriter()
-out.set_current_file_name("md5sum.txt")
-out.write("%s %s\n" % (hexdigest, file_name))
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/pgp-survey-import b/crunch_scripts/pgp-survey-import
deleted file mode 100755
index f12e84b2d..000000000
--- a/crunch_scripts/pgp-survey-import
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import string
-import json
-import UserDict
-import sys
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-this_job_input = this_job['script_parameters']['input']
-
-out = arvados.CollectionWriter()
-out.set_current_file_name("arvados_objects.json")
-out.write("[\n")
-separator = ""
-
-traits = {}
-done_bytes = 0
-done_ratio = 0
-for input_file in arvados.CollectionReader(this_job_input).all_files():
-    for line_number, line in enumerate(input_file.readlines()):
-
-        done_bytes += len(line)
-        new_done_ratio = 1.0 * done_bytes / input_file.size()
-        if line_number == 2 or new_done_ratio - done_ratio > 0.05:
-            sys.stderr.write("progress: %d%% after %d lines\n" % (int(done_ratio * 100), line_number+1))
-            done_ratio = new_done_ratio
-
-        words = string.split(string.strip(line), "\t")
-        if line_number == 0:
-            headings = words
-            for t in arvados.api('v1').traits().list(
-                where={'name':words},
-                limit=1000
-                ).execute()['items']:
-                traits[t['name']] = t
-            for i, trait_name in enumerate(words[3:], start=3):
-                # find or create trait
-                if trait_name not in traits:
-                    traits_match = arvados.api('v1').traits().list(
-                        where={'name':trait_name}
-                        ).execute()['items']
-                    if len(traits_match) > 0:
-                        traits[trait_name] = traits_match[0]
-                    else:
-                        traits[trait_name] = arvados.api('v1').traits().create(
-                            trait={'name':trait_name}).execute()
-                out.write(separator)
-                out.write(json.dumps(traits[trait_name]))
-                separator = ",\n"
-        else:
-            huID_links_match = arvados.api('v1').links().list(
-                where={'link_class':'identifier','name':words[0]}
-                ).execute()['items']
-            if len(huID_links_match) > 0:
-                human_uuid = huID_links_match[0]['head_uuid']
-            else:
-                human = arvados.api('v1').humans().create(
-                    body={}
-                    ).execute()
-                huID_link = arvados.api('v1').links().create(
-                    body={
-                        'link_class':'identifier',
-                        'name':words[0],
-                        'head_kind':'arvados#human',
-                        'head_uuid':human['uuid']
-                        }
-                    ).execute()
-                human_uuid = human['uuid']
-            human_trait = {}
-            for t in arvados.api('v1').links().list(
-                limit=10000,
-                where={
-                    'tail_uuid':human_uuid,
-                    'tail_kind':'arvados#human',
-                    'head_kind':'arvados#trait',
-                    'link_class':'human_trait',
-                    'name':'pgp-survey-response'
-                    }
-                ).execute()['items']:
-                human_trait[t['head_uuid']] = t
-            for i, trait_value in enumerate(words[3:], start=3):
-                trait_uuid = traits[headings[i]]['uuid']
-                if trait_uuid in human_trait:
-                    trait_link = human_trait[trait_uuid]
-                    if trait_link['properties']['value'] != trait_value:
-                        # update database value to match survey response
-                        trait_link['properties']['value'] = trait_value
-                        arvados.api('v1').links().update(
-                            uuid=trait_link['uuid'],
-                            body={'properties':trait_link['properties']}
-                            ).execute()
-                    out.write(",\n")
-                    out.write(json.dumps(trait_link))
-                elif trait_value == '':
-                    # nothing in database, nothing in input
-                    pass
-                else:
-                    trait_link = {
-                        'tail_uuid':human_uuid,
-                        'tail_kind':'arvados#human',
-                        'head_uuid':traits[headings[i]]['uuid'],
-                        'head_kind':'arvados#trait',
-                        'link_class':'human_trait',
-                        'name':'pgp-survey-response',
-                        'properties': { 'value': trait_value }
-                        }
-                    arvados.api('v1').links().create(
-                        body=trait_link
-                        ).execute()
-                    out.write(",\n")
-                    out.write(json.dumps(trait_link))
-
-out.write("\n]\n")
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/pgp-survey-parse b/crunch_scripts/pgp-survey-parse
deleted file mode 100755
index ee852f1d2..000000000
--- a/crunch_scripts/pgp-survey-parse
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-parser_path = arvados.util.git_checkout(
-    url = this_job['script_parameters']['parser_url'],
-    version = this_job['script_parameters']['parser_version'],
-    path = 'parser')
-
-stdoutdata, stderrdata = arvados.util.run_command(
-    ["python", "demo.py"],
-    cwd=parser_path)
-
-out = arvados.CollectionWriter()
-out.write(stdoutdata)
-out.set_current_file_name('participant_traits.tsv')
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/picard-gatk2-prep b/crunch_scripts/picard-gatk2-prep
deleted file mode 100755
index 976060f01..000000000
--- a/crunch_scripts/picard-gatk2-prep
+++ /dev/null
@@ -1,211 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import os
-import re
-import sys
-import subprocess
-import arvados_picard
-from arvados_ipc import *
-
-arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True)
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-ref_dir = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['reference'],
-    path = 'reference',
-    decompress = True)
-ref_fasta_files = [os.path.join(ref_dir, f)
-                   for f in os.listdir(ref_dir)
-                   if re.search(r'\.fasta(\.gz)?$', f)]
-input_collection = this_task['parameters']['input']
-
-for s in arvados.CollectionReader(input_collection).all_streams():
-    for f in s.all_files():
-        input_stream_name = s.name()
-        input_file_name = f.name()
-        break
-
-# Unfortunately, picard FixMateInformation cannot read from a pipe. We
-# must copy the input to a temporary file before running picard.
-input_bam_path = os.path.join(this_task.tmpdir, input_file_name)
-with open(input_bam_path, 'wb') as bam:
-    for s in arvados.CollectionReader(input_collection).all_streams():
-        for f in s.all_files():
-            for s in f.readall():
-                bam.write(s)
-
-children = {}
-pipes = {}
-
-pipe_setup(pipes, 'fixmate')
-if 0==named_fork(children, 'fixmate'):
-    pipe_closeallbut(pipes, ('fixmate', 'w'))
-    arvados_picard.run(
-        'FixMateInformation',
-        params={
-            'i': input_bam_path,
-            'o': '/dev/stdout',
-            'quiet': 'true',
-            'so': 'coordinate',
-            'validation_stringency': 'LENIENT',
-            'compression_level': 0
-            },
-        stdout=os.fdopen(pipes['fixmate','w'], 'wb', 2**20))
-    os._exit(0)
-os.close(pipes.pop(('fixmate','w'), None))
-
-pipe_setup(pipes, 'sortsam')
-if 0==named_fork(children, 'sortsam'):
-    pipe_closeallbut(pipes, ('fixmate', 'r'), ('sortsam', 'w'))
-    arvados_picard.run(
-        'SortSam',
-        params={
-            'i': '/dev/stdin',
-            'o': '/dev/stdout',
-            'quiet': 'true',
-            'so': 'coordinate',
-            'validation_stringency': 'LENIENT',
-            'compression_level': 0
-            },
-        stdin=os.fdopen(pipes['fixmate','r'], 'rb', 2**20),
-        stdout=os.fdopen(pipes['sortsam','w'], 'wb', 2**20))
-    os._exit(0)
-
-pipe_setup(pipes, 'reordersam')
-if 0==named_fork(children, 'reordersam'):
-    pipe_closeallbut(pipes, ('sortsam', 'r'), ('reordersam', 'w'))
-    arvados_picard.run(
-        'ReorderSam',
-        params={
-            'i': '/dev/stdin',
-            'o': '/dev/stdout',
-            'reference': ref_fasta_files[0],
-            'quiet': 'true',
-            'validation_stringency': 'LENIENT',
-            'compression_level': 0
-            },
-        stdin=os.fdopen(pipes['sortsam','r'], 'rb', 2**20),
-        stdout=os.fdopen(pipes['reordersam','w'], 'wb', 2**20))
-    os._exit(0)
-
-pipe_setup(pipes, 'addrg')
-if 0==named_fork(children, 'addrg'):
-    pipe_closeallbut(pipes, ('reordersam', 'r'), ('addrg', 'w'))
-    arvados_picard.run(
-        'AddOrReplaceReadGroups',
-        params={
-            'i': '/dev/stdin',
-            'o': '/dev/stdout',
-            'quiet': 'true',
-            'rglb': this_job['script_parameters'].get('rglb', 0),
-            'rgpl': this_job['script_parameters'].get('rgpl', 'illumina'),
-            'rgpu': this_job['script_parameters'].get('rgpu', 0),
-            'rgsm': this_job['script_parameters'].get('rgsm', 0),
-            'validation_stringency': 'LENIENT'
-            },
-        stdin=os.fdopen(pipes['reordersam','r'], 'rb', 2**20),
-        stdout=os.fdopen(pipes['addrg','w'], 'wb', 2**20))
-    os._exit(0)
-
-pipe_setup(pipes, 'bammanifest')
-pipe_setup(pipes, 'bam')
-pipe_setup(pipes, 'casm_in')
-if 0==named_fork(children, 'bammanifest'):
-    pipe_closeallbut(pipes,
-                     ('addrg', 'r'),
-                     ('bammanifest', 'w'),
-                     ('bam', 'w'),
-                     ('casm_in', 'w'))
-    out = arvados.CollectionWriter()
-    out.start_new_stream(input_stream_name)
-    out.start_new_file(input_file_name)
-    while True:
-        buf = os.read(pipes['addrg','r'], 2**20)
-        if len(buf) == 0:
-            break
-        os.write(pipes['bam','w'], buf)
-        os.write(pipes['casm_in','w'], buf)
-        out.write(buf)
-    os.write(pipes['bammanifest','w'], out.manifest_text())
-    os.close(pipes['bammanifest','w'])
-    os._exit(0)
-
-pipe_setup(pipes, 'casm')
-if 0 == named_fork(children, 'casm'):
-    pipe_closeallbut(pipes, ('casm_in', 'r'), ('casm', 'w'))
-    arvados_picard.run(
-        'CollectAlignmentSummaryMetrics',
-        params={
-            'input': '/dev/fd/' + str(pipes['casm_in','r']),
-            'output': '/dev/fd/' + str(pipes['casm','w']),
-            'reference_sequence': ref_fasta_files[0],
-            'validation_stringency': 'LENIENT',
-            },
-        close_fds=False)
-    os._exit(0)
-
-pipe_setup(pipes, 'index')
-if 0==named_fork(children, 'index'):
-    pipe_closeallbut(pipes, ('bam', 'r'), ('index', 'w'))
-    arvados_picard.run(
-        'BuildBamIndex',
-        params={
-            'i': '/dev/stdin',
-            'o': '/dev/stdout',
-            'quiet': 'true',
-            'validation_stringency': 'LENIENT'
-            },
-        stdin=os.fdopen(pipes['bam','r'], 'rb', 2**20),
-        stdout=os.fdopen(pipes['index','w'], 'wb', 2**20))
-    os._exit(0)
-
-pipe_setup(pipes, 'indexmanifest')
-if 0==named_fork(children, 'indexmanifest'):
-    pipe_closeallbut(pipes, ('index', 'r'), ('indexmanifest', 'w'))
-    out = arvados.CollectionWriter()
-    out.start_new_stream(input_stream_name)
-    out.start_new_file(re.sub('\.bam$', '.bai', input_file_name))
-    while True:
-        buf = os.read(pipes['index','r'], 2**20)
-        if len(buf) == 0:
-            break
-        out.write(buf)
-    os.write(pipes['indexmanifest','w'], out.manifest_text())
-    os.close(pipes['indexmanifest','w'])
-    os._exit(0)
-
-pipe_closeallbut(pipes,
-                 ('bammanifest', 'r'),
-                 ('indexmanifest', 'r'),
-                 ('casm', 'r'))
-
-outmanifest = ''
-
-for which in ['bammanifest', 'indexmanifest']:
-    with os.fdopen(pipes[which,'r'], 'rb', 2**20) as f:
-        while True:
-            buf = f.read()
-            if buf == '':
-                break
-            outmanifest += buf
-
-casm_out = arvados.CollectionWriter()
-casm_out.start_new_stream(input_stream_name)
-casm_out.start_new_file(input_file_name + '.casm.tsv')
-casm_out.write(os.fdopen(pipes.pop(('casm','r'))))
-
-outmanifest += casm_out.manifest_text()
-
-all_ok = True
-for (childname, pid) in children.items():
-    all_ok = all_ok and waitpid_and_check_exit(pid, childname)
-
-if all_ok:
-    this_task.set_output(outmanifest)
-else:
-    sys.exit(1)
diff --git a/crunch_scripts/pyrtg.py b/crunch_scripts/pyrtg.py
deleted file mode 100644
index d733270f8..000000000
--- a/crunch_scripts/pyrtg.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import re
-import os
-import sys
-
-rtg_install_path = None
-
-def setup():
-    global rtg_install_path
-    if rtg_install_path:
-        return rtg_install_path
-    rtg_path = arvados.util.zipball_extract(
-        zipball = arvados.current_job()['script_parameters']['rtg_binary_zip'],
-        path = 'rtg')
-    rtg_license_path = arvados.util.collection_extract(
-        collection = arvados.current_job()['script_parameters']['rtg_license'],
-        path = 'license',
-        decompress = False)
-
-    # symlink to rtg-license.txt
-    license_txt_path = os.path.join(rtg_license_path, 'rtg-license.txt')
-    try:
-        os.symlink(license_txt_path, os.path.join(rtg_path,'rtg-license.txt'))
-    except OSError:
-        if not os.path.exists(os.path.join(rtg_path,'rtg-license.txt')):
-            os.symlink(license_txt_path, os.path.join(rtg_path,'rtg-license.txt'))
-
-    rtg_install_path = rtg_path
-    return rtg_path
-
-def run_rtg(command, output_dir, command_args, **kwargs):
-    global rtg_install_path
-    execargs = [os.path.join(rtg_install_path, 'rtg'),
-                command,
-                '-o', output_dir]
-    execargs += command_args
-    sys.stderr.write("run_rtg: exec %s\n" % str(execargs))
-    arvados.util.run_command(
-        execargs,
-        cwd=arvados.current_task().tmpdir,
-        stderr=sys.stderr,
-        stdout=sys.stderr)
-
-    # Exit status cannot be trusted in rtg 1.1.1.
-    assert_done(output_dir)
-
-    # Copy log files to stderr and delete them to avoid storing them
-    # in Keep with the output data.
-    for dirent in arvados.util.listdir_recursive(output_dir):
-        if is_log_file(dirent):
-            log_file = os.path.join(output_dir, dirent)
-            sys.stderr.write(' '.join(['==>', dirent, '<==\n']))
-            with open(log_file, 'rb') as f:
-                while True:
-                    buf = f.read(2**20)
-                    if len(buf) == 0:
-                        break
-                    sys.stderr.write(buf)
-            sys.stderr.write('\n') # in case log does not end in newline
-            os.unlink(log_file)
-
-def assert_done(output_dir):
-    # Sanity-check exit code.
-    done_file = os.path.join(output_dir, 'done')
-    if not os.path.exists(done_file):
-        raise Exception("rtg exited 0 but %s does not exist. abort.\n" % done_file)
-
-def is_log_file(filename):
-    return re.search(r'^(.*/)?(progress|done|\S+.log)$', filename)
-
-setup()
diff --git a/crunch_scripts/rtg-fasta2sdf b/crunch_scripts/rtg-fasta2sdf
deleted file mode 100755
index f1ef617f6..000000000
--- a/crunch_scripts/rtg-fasta2sdf
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import os
-import re
-import sys
-import pyrtg
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-fasta_path = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['input'],
-    path = 'fasta',
-    decompress = False)
-fasta_files = filter(lambda f: f != '.locator', os.listdir(fasta_path))
-out_dir = os.path.join(arvados.current_task().tmpdir, 'ref-sdf')
-arvados.util.run_command(['rm', '-rf', out_dir], stderr=sys.stderr)
-
-pyrtg.run_rtg('format', out_dir,
-              map(lambda f: os.path.join(fasta_path, f), fasta_files))
-
-out = arvados.CollectionWriter()
-out.write_directory_tree(out_dir, max_manifest_depth=0)
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/rtg-fastq2sdf b/crunch_scripts/rtg-fastq2sdf
deleted file mode 100755
index e42697fc4..000000000
--- a/crunch_scripts/rtg-fastq2sdf
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import os
-import re
-import sys
-import pyrtg
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-fastq_path = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['input'],
-    path = 'fastq')
-fastq_files = filter(lambda f: f != '.locator', os.listdir(fastq_path))
-tmp_dir_base = os.path.join(arvados.current_task().tmpdir, 'tmp')
-out_dir = os.path.join(arvados.current_task().tmpdir, 'reads')
-
-arvados.util.run_command(['rm', '-rf', tmp_dir_base], stderr=sys.stderr)
-arvados.util.run_command(['rm', '-rf', out_dir], stderr=sys.stderr)
-os.mkdir(tmp_dir_base)
-
-# convert fastq to sdf
-tmp_dirs = []
-for leftarm in fastq_files:
-    if re.search(r'_1.f(ast)?q(.gz)?$', leftarm):
-        rightarm = re.sub(r'_1(.f(ast)?q(.gz)?)$', '_2\\1', leftarm)
-        if rightarm in fastq_files:
-            tmp_dirs += ['%s/%08d' % (tmp_dir_base, len(tmp_dirs))]
-            pyrtg.run_rtg('format', tmp_dirs[-1],
-                          ['-f', 'fastq',
-                           '-q', 'sanger',
-                           '-l', os.path.join(fastq_path, leftarm),
-                           '-r', os.path.join(fastq_path, rightarm)])
-
-# split sdf
-pyrtg.run_rtg('sdfsplit', out_dir,
-              ['-n', '1500000'] + tmp_dirs)
-
-# store output
-out = arvados.CollectionWriter()
-out.write_directory_tree(out_dir, max_manifest_depth=1)
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/rtg-map b/crunch_scripts/rtg-map
deleted file mode 100755
index f740888b9..000000000
--- a/crunch_scripts/rtg-map
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import os
-import re
-import sys
-import pyrtg
-
-arvados.job_setup.one_task_per_input_stream(if_sequence=0, and_end_task=True)
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-in_dir = os.path.join(this_task.tmpdir, 'input')
-arvados.util.run_command(['rm', '-rf', in_dir], stderr=sys.stderr)
-in_dir = arvados.util.stream_extract(
-    stream = arvados.StreamReader(this_task['parameters']['input']),
-    path = in_dir,
-    decompress = False)
-ref_dir = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['reference'],
-    path = 'reference',
-    decompress = False)
-
-out_dir = os.path.join(arvados.current_task().tmpdir, 'out')
-arvados.util.run_command(['rm', '-rf', out_dir], stderr=sys.stderr)
-
-# map reads
-pyrtg.run_rtg('map', out_dir,
-              ['-i', in_dir,
-               '-t', ref_dir,
-               '-a', '2',
-               '-b', '1',
-               '--sam-rg', '@RG\\tID:NA\\tSM:NA\\tPL:ILLUMINA'])
-
-# store output
-out = arvados.CollectionWriter()
-out.write_directory_tree(out_dir, this_task['parameters']['input'][0], 0)
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/rtg-snp b/crunch_scripts/rtg-snp
deleted file mode 100755
index 1d8a605b9..000000000
--- a/crunch_scripts/rtg-snp
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import os
-import re
-import sys
-import pyrtg
-
-this_job = arvados.current_job()
-this_task = arvados.current_task()
-ref_dir = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['reference'],
-    path = 'reference',
-    decompress = False)
-input_dir = arvados.util.collection_extract(
-    collection = this_job['script_parameters']['input'],
-    path = 'input')
-bam_files = map(lambda f: os.path.join(input_dir, f),
-                filter(lambda f: re.search(r'^(.*/)?alignments.bam$', f),
-                       arvados.util.listdir_recursive(input_dir)))
-out_dir = os.path.join(arvados.current_task().tmpdir, 'out')
-arvados.util.run_command(['rm', '-rf', out_dir], stderr=sys.stderr)
-
-# call sequence variants
-pyrtg.run_rtg('snp', out_dir,
-              ['-t', ref_dir] + bam_files)
-
-# store output
-out = arvados.CollectionWriter()
-out.write_directory_tree(out_dir, max_manifest_depth=0)
-this_task.set_output(out.finish())
diff --git a/crunch_scripts/run-command b/crunch_scripts/run-command
deleted file mode 100755
index 3fd08bf28..000000000
--- a/crunch_scripts/run-command
+++ /dev/null
@@ -1,458 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import logging
-
-logger = logging.getLogger('run-command')
-log_handler = logging.StreamHandler()
-log_handler.setFormatter(logging.Formatter("run-command: %(message)s"))
-logger.addHandler(log_handler)
-logger.setLevel(logging.INFO)
-
-import arvados
-import re
-import os
-import subprocess
-import sys
-import shutil
-import crunchutil.subst as subst
-import time
-import arvados.commands.put as put
-import signal
-import stat
-import copy
-import traceback
-import pprint
-import multiprocessing
-import crunchutil.robust_put as robust_put
-import crunchutil.vwd as vwd
-import argparse
-import json
-import tempfile
-import errno
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--dry-run', action='store_true')
-parser.add_argument('--script-parameters', type=str, default="{}")
-args = parser.parse_args()
-
-os.umask(0077)
-
-if not args.dry_run:
-    api = arvados.api('v1')
-    t = arvados.current_task().tmpdir
-    os.chdir(arvados.current_task().tmpdir)
-    os.mkdir("tmpdir")
-    os.mkdir("output")
-
-    os.chdir("output")
-
-    outdir = os.getcwd()
-
-    taskp = None
-    jobp = arvados.current_job()['script_parameters']
-    if len(arvados.current_task()['parameters']) > 0:
-        taskp = arvados.current_task()['parameters']
-else:
-    outdir = "/tmp"
-    jobp = json.loads(args.script_parameters)
-    os.environ['JOB_UUID'] = 'zzzzz-8i9sb-1234567890abcde'
-    os.environ['TASK_UUID'] = 'zzzzz-ot0gb-1234567890abcde'
-    os.environ['CRUNCH_SRC'] = '/tmp/crunch-src'
-    if 'TASK_KEEPMOUNT' not in os.environ:
-        os.environ['TASK_KEEPMOUNT'] = '/keep'
-
-def sub_tmpdir(v):
-    return os.path.join(arvados.current_task().tmpdir, 'tmpdir')
-
-def sub_outdir(v):
-    return outdir
-
-def sub_cores(v):
-     return str(multiprocessing.cpu_count())
-
-def sub_jobid(v):
-     return os.environ['JOB_UUID']
-
-def sub_taskid(v):
-     return os.environ['TASK_UUID']
-
-def sub_jobsrc(v):
-     return os.environ['CRUNCH_SRC']
-
-subst.default_subs["task.tmpdir"] = sub_tmpdir
-subst.default_subs["task.outdir"] = sub_outdir
-subst.default_subs["job.srcdir"] = sub_jobsrc
-subst.default_subs["node.cores"] = sub_cores
-subst.default_subs["job.uuid"] = sub_jobid
-subst.default_subs["task.uuid"] = sub_taskid
-
-class SigHandler(object):
-    def __init__(self):
-        self.sig = None
-
-    def send_signal(self, subprocesses, signum):
-        for sp in subprocesses:
-            sp.send_signal(signum)
-        self.sig = signum
-
-# http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html
-def flatten(l, ltypes=(list, tuple)):
-    ltype = type(l)
-    l = list(l)
-    i = 0
-    while i < len(l):
-        while isinstance(l[i], ltypes):
-            if not l[i]:
-                l.pop(i)
-                i -= 1
-                break
-            else:
-                l[i:i + 1] = l[i]
-        i += 1
-    return ltype(l)
-
-def add_to_group(gr, match):
-    m = match.groups()
-    if m not in gr:
-        gr[m] = []
-    gr[m].append(match.group(0))
-
-class EvaluationError(Exception):
-    pass
-
-# Return the name of variable ('var') that will take on each value in 'items'
-# when performing an inner substitution
-def var_items(p, c, key):
-    if key not in c:
-        raise EvaluationError("'%s' was expected in 'p' but is missing" % key)
-
-    if "var" in c:
-        if not isinstance(c["var"], basestring):
-            raise EvaluationError("Value of 'var' must be a string")
-        # Var specifies the variable name for inner parameter substitution
-        return (c["var"], get_items(p, c[key]))
-    else:
-        # The component function ('key') value is a list, so return the list
-        # directly with no parameter selected.
-        if isinstance(c[key], list):
-            return (None, get_items(p, c[key]))
-        elif isinstance(c[key], basestring):
-            # check if c[key] is a string that looks like a parameter
-            m = re.match("^\$\((.*)\)$", c[key])
-            if m and m.group(1) in p:
-                return (m.group(1), get_items(p, c[key]))
-            else:
-                # backwards compatible, foreach specifies bare parameter name to use
-                return (c[key], get_items(p, p[c[key]]))
-        else:
-            raise EvaluationError("Value of '%s' must be a string or list" % key)
-
-# "p" is the parameter scope, "c" is the item to be expanded.
-# If "c" is a dict, apply function expansion.
-# If "c" is a list, recursively expand each item and return a new list.
-# If "c" is a string, apply parameter substitution
-def expand_item(p, c):
-    if isinstance(c, dict):
-        if "foreach" in c and "command" in c:
-            # Expand a command template for each item in the specified user
-            # parameter
-            var, items = var_items(p, c, "foreach")
-            if var is None:
-                raise EvaluationError("Must specify 'var' in foreach")
-            r = []
-            for i in items:
-                params = copy.copy(p)
-                params[var] = i
-                r.append(expand_item(params, c["command"]))
-            return r
-        elif "list" in c and "index" in c and "command" in c:
-            # extract a single item from a list
-            var, items = var_items(p, c, "list")
-            if var is None:
-                raise EvaluationError("Must specify 'var' in list")
-            params = copy.copy(p)
-            params[var] = items[int(c["index"])]
-            return expand_item(params, c["command"])
-        elif "regex" in c:
-            pattern = re.compile(c["regex"])
-            if "filter" in c:
-                # filter list so that it only includes items that match a
-                # regular expression
-                _, items = var_items(p, c, "filter")
-                return [i for i in items if pattern.match(i)]
-            elif "group" in c:
-                # generate a list of lists, where items are grouped on common
-                # subexpression match
-                _, items = var_items(p, c, "group")
-                groups = {}
-                for i in items:
-                    match = pattern.match(i)
-                    if match:
-                        add_to_group(groups, match)
-                return [groups[k] for k in groups]
-            elif "extract" in c:
-                # generate a list of lists, where items are split by
-                # subexpression match
-                _, items = var_items(p, c, "extract")
-                r = []
-                for i in items:
-                    match = pattern.match(i)
-                    if match:
-                        r.append(list(match.groups()))
-                return r
-        elif "batch" in c and "size" in c:
-            # generate a list of lists, where items are split into a batch size
-            _, items = var_items(p, c, "batch")
-            sz = int(c["size"])
-            r = []
-            for j in xrange(0, len(items), sz):
-                r.append(items[j:j+sz])
-            return r
-        raise EvaluationError("Missing valid list context function")
-    elif isinstance(c, list):
-        return [expand_item(p, arg) for arg in c]
-    elif isinstance(c, basestring):
-        m = re.match("^\$\((.*)\)$", c)
-        if m and m.group(1) in p:
-            return expand_item(p, p[m.group(1)])
-        else:
-            return subst.do_substitution(p, c)
-    else:
-        raise EvaluationError("expand_item() unexpected parameter type %s" % type(c))
-
-# Evaluate in a list context
-# "p" is the parameter scope, "value" will be evaluated
-# if "value" is a list after expansion, return that
-# if "value" is a path to a directory, return a list consisting of each entry in the directory
-# if "value" is a path to a file, return a list consisting of each line of the file
-def get_items(p, value):
-    value = expand_item(p, value)
-    if isinstance(value, list):
-        return value
-    elif isinstance(value, basestring):
-        mode = os.stat(value).st_mode
-        prefix = value[len(os.environ['TASK_KEEPMOUNT'])+1:]
-        if mode is not None:
-            if stat.S_ISDIR(mode):
-                items = [os.path.join(value, l) for l in os.listdir(value)]
-            elif stat.S_ISREG(mode):
-                with open(value) as f:
-                    items = [line.rstrip("\r\n") for line in f]
-            return items
-    raise EvaluationError("get_items did not yield a list")
-
-stdoutname = None
-stdoutfile = None
-stdinname = None
-stdinfile = None
-
-# Construct the cross product of all values of each variable listed in fvars
-def recursive_foreach(params, fvars):
-    var = fvars[0]
-    fvars = fvars[1:]
-    items = get_items(params, params[var])
-    logger.info("parallelizing on %s with items %s" % (var, items))
-    if items is not None:
-        for i in items:
-            params = copy.copy(params)
-            params[var] = i
-            if len(fvars) > 0:
-                recursive_foreach(params, fvars)
-            else:
-                if not args.dry_run:
-                    arvados.api().job_tasks().create(body={
-                        'job_uuid': arvados.current_job()['uuid'],
-                        'created_by_job_task_uuid': arvados.current_task()['uuid'],
-                        'sequence': 1,
-                        'parameters': params
-                    }).execute()
-                else:
-                    if isinstance(params["command"][0], list):
-                        for c in params["command"]:
-                            logger.info(flatten(expand_item(params, c)))
-                    else:
-                        logger.info(flatten(expand_item(params, params["command"])))
-    else:
-        logger.error("parameter %s with value %s in task.foreach yielded no items" % (var, params[var]))
-        sys.exit(1)
-
-try:
-    if "task.foreach" in jobp:
-        if args.dry_run or arvados.current_task()['sequence'] == 0:
-            # This is the first task to start the other tasks and exit
-            fvars = jobp["task.foreach"]
-            if isinstance(fvars, basestring):
-                fvars = [fvars]
-            if not isinstance(fvars, list) or len(fvars) == 0:
-                logger.error("value of task.foreach must be a string or non-empty list")
-                sys.exit(1)
-            recursive_foreach(jobp, jobp["task.foreach"])
-            if not args.dry_run:
-                if "task.vwd" in jobp:
-                    # Set output of the first task to the base vwd collection so it
-                    # will be merged with output fragments from the other tasks by
-                    # crunch.
-                    arvados.current_task().set_output(subst.do_substitution(jobp, jobp["task.vwd"]))
-                else:
-                    arvados.current_task().set_output(None)
-            sys.exit(0)
-    else:
-        # This is the only task so taskp/jobp are the same
-        taskp = jobp
-except Exception as e:
-    logger.exception("caught exception")
-    logger.error("job parameters were:")
-    logger.error(pprint.pformat(jobp))
-    sys.exit(1)
-
-try:
-    if not args.dry_run:
-        if "task.vwd" in taskp:
-            # Populate output directory with symlinks to files in collection
-            vwd.checkout(subst.do_substitution(taskp, taskp["task.vwd"]), outdir)
-
-        if "task.cwd" in taskp:
-            os.chdir(subst.do_substitution(taskp, taskp["task.cwd"]))
-
-    cmd = []
-    if isinstance(taskp["command"][0], list):
-        for c in taskp["command"]:
-            cmd.append(flatten(expand_item(taskp, c)))
-    else:
-        cmd.append(flatten(expand_item(taskp, taskp["command"])))
-
-    if "task.stdin" in taskp:
-        stdinname = subst.do_substitution(taskp, taskp["task.stdin"])
-        if not args.dry_run:
-            stdinfile = open(stdinname, "rb")
-
-    if "task.stdout" in taskp:
-        stdoutname = subst.do_substitution(taskp, taskp["task.stdout"])
-        if not args.dry_run:
-            stdoutfile = open(stdoutname, "wb")
-
-    if "task.env" in taskp:
-        env = copy.copy(os.environ)
-        for k,v in taskp["task.env"].items():
-            env[k] = subst.do_substitution(taskp, v)
-    else:
-        env = None
-
-    logger.info("{}{}{}".format(' | '.join([' '.join(c) for c in cmd]), (" < " + stdinname) if stdinname is not None else "", (" > " + stdoutname) if stdoutname is not None else ""))
-
-    if args.dry_run:
-        sys.exit(0)
-except subst.SubstitutionError as e:
-    logger.error(str(e))
-    logger.error("task parameters were:")
-    logger.error(pprint.pformat(taskp))
-    sys.exit(1)
-except Exception as e:
-    logger.exception("caught exception")
-    logger.error("task parameters were:")
-    logger.error(pprint.pformat(taskp))
-    sys.exit(1)
-
-# rcode holds the return codes produced by each subprocess
-rcode = {}
-try:
-    subprocesses = []
-    close_streams = []
-    if stdinfile:
-        close_streams.append(stdinfile)
-    next_stdin = stdinfile
-
-    for i in xrange(len(cmd)):
-        if i == len(cmd)-1:
-            # this is the last command in the pipeline, so its stdout should go to stdoutfile
-            next_stdout = stdoutfile
-        else:
-            # this is an intermediate command in the pipeline, so its stdout should go to a pipe
-            next_stdout = subprocess.PIPE
-
-        sp = subprocess.Popen(cmd[i], shell=False, stdin=next_stdin, stdout=next_stdout, env=env)
-
-        # Need to close the FDs on our side so that subcommands will get SIGPIPE if the
-        # consuming process ends prematurely.
-        if sp.stdout:
-            close_streams.append(sp.stdout)
-
-        # Send this processes's stdout to to the next process's stdin
-        next_stdin = sp.stdout
-
-        subprocesses.append(sp)
-
-    # File descriptors have been handed off to the subprocesses, so close them here.
-    for s in close_streams:
-        s.close()
-
-    # Set up signal handling
-    sig = SigHandler()
-
-    # Forward terminate signals to the subprocesses.
-    signal.signal(signal.SIGINT, lambda signum, frame: sig.send_signal(subprocesses, signum))
-    signal.signal(signal.SIGTERM, lambda signum, frame: sig.send_signal(subprocesses, signum))
-    signal.signal(signal.SIGQUIT, lambda signum, frame: sig.send_signal(subprocesses, signum))
-
-    active = 1
-    pids = set([s.pid for s in subprocesses])
-    while len(pids) > 0:
-        try:
-            (pid, status) = os.wait()
-        except OSError as e:
-            if e.errno == errno.EINTR:
-                pass
-            else:
-                raise
-        else:
-            pids.discard(pid)
-            if not taskp.get("task.ignore_rcode"):
-                rcode[pid] = (status >> 8)
-            else:
-                rcode[pid] = 0
-
-    if sig.sig is not None:
-        logger.critical("terminating on signal %s" % sig.sig)
-        sys.exit(2)
-    else:
-        for i in xrange(len(cmd)):
-            r = rcode[subprocesses[i].pid]
-            logger.info("%s completed with exit code %i (%s)" % (cmd[i][0], r, "success" if r == 0 else "failed"))
-
-except Exception as e:
-    logger.exception("caught exception")
-
-# restore default signal handlers.
-signal.signal(signal.SIGINT, signal.SIG_DFL)
-signal.signal(signal.SIGTERM, signal.SIG_DFL)
-signal.signal(signal.SIGQUIT, signal.SIG_DFL)
-
-logger.info("the following output files will be saved to keep:")
-
-subprocess.call(["find", "-L", ".", "-type", "f", "-printf", "run-command: %12.12s %h/%f\\n"], stdout=sys.stderr, cwd=outdir)
-
-logger.info("start writing output to keep")
-
-if "task.vwd" in taskp and "task.foreach" in jobp:
-    for root, dirs, files in os.walk(outdir):
-        for f in files:
-            s = os.lstat(os.path.join(root, f))
-            if stat.S_ISLNK(s.st_mode):
-                os.unlink(os.path.join(root, f))
-
-(outcollection, checkin_error) = vwd.checkin(outdir)
-
-# Success if we ran any subprocess, and they all exited 0.
-success = rcode and all(status == 0 for status in rcode.itervalues()) and not checkin_error
-
-api.job_tasks().update(uuid=arvados.current_task()['uuid'],
-                                     body={
-                                         'output': outcollection.manifest_text(),
-                                         'success': success,
-                                         'progress':1.0
-                                     }).execute()
-
-sys.exit(0 if success else 1)
diff --git a/crunch_scripts/split-fastq.py b/crunch_scripts/split-fastq.py
deleted file mode 100755
index 61c384fbf..000000000
--- a/crunch_scripts/split-fastq.py
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import re
-import hashlib
-import string
-
-api = arvados.api('v1')
-
-piece = 0
-manifest_text = ""
-
-# Look for paired reads
-
-inp = arvados.CollectionReader(arvados.getjobparam('reads'))
-
-manifest_list = []
-
-def nextline(reader, start):
-    n = -1
-    while True:
-        r = reader.readfrom(start, 128)
-        if r == '':
-            break
-        n = string.find(r, "\n")
-        if n > -1:
-            break
-        else:
-            start += 128
-    return n
-
-prog = re.compile(r'(.*?)(_[12])?\.fastq(\.gz)?$')
-
-# Look for fastq files
-for s in inp.all_streams():
-    for f in s.all_files():
-        name_pieces = prog.match(f.name())
-        if name_pieces is not None:
-            if s.name() != ".":
-                # The downstream tool (run-command) only iterates over the top
-                # level of directories so if there are fastq files in
-                # directories in the input, the choice is either to forget
-                # there are directories (which might lead to name conflicts) or
-                # just fail.
-                print >>sys.stderr, "fastq must be at the root of the collection"
-                sys.exit(1)
-
-            p = None
-            if name_pieces.group(2) is not None:
-                if name_pieces.group(2) == "_1":
-                    p = [{}, {}]
-                    p[0]["reader"] = s.files()[name_pieces.group(0)]
-                    p[1]["reader"] = s.files()[name_pieces.group(1) + "_2.fastq" + (name_pieces.group(3) if name_pieces.group(3) else '')]
-            else:
-                p = [{}]
-                p[0]["reader"] = s.files()[name_pieces.group(0)]
-
-            if p is not None:
-                for i in xrange(0, len(p)):
-                    m = p[i]["reader"].as_manifest().split()
-                    m[0] = "./_" + str(piece)
-                    manifest_list.append(m)
-                piece += 1
-
-manifest_text = "\n".join(" ".join(m) for m in manifest_list) + "\n"
-
-arvados.current_task().set_output(manifest_text)
diff --git a/crunch_scripts/test/task_output_dir b/crunch_scripts/test/task_output_dir
deleted file mode 100755
index 8b2c7ced4..000000000
--- a/crunch_scripts/test/task_output_dir
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import arvados.crunch
-import hashlib
-import os
-
-out = arvados.crunch.TaskOutputDir()
-
-string = open(__file__).read()
-with open(os.path.join(out.path, 'example.out'), 'w') as f:
-    f.write(string)
-with open(os.path.join(out.path, 'example.out.SHA1'), 'w') as f:
-    f.write(hashlib.sha1(string).hexdigest() + "\n")
-
-arvados.current_task().set_output(out.manifest_text())
diff --git a/services/api/app/controllers/arvados/v1/job_tasks_controller.rb b/services/api/app/controllers/arvados/v1/job_tasks_controller.rb
index 07bbc33ab..b960d2e9e 100644
--- a/services/api/app/controllers/arvados/v1/job_tasks_controller.rb
+++ b/services/api/app/controllers/arvados/v1/job_tasks_controller.rb
@@ -4,4 +4,9 @@
 
 class Arvados::V1::JobTasksController < ApplicationController
   accept_attribute_as_json :parameters, Hash
+
+  def create
+    return send_error("Unsupported legacy jobs API",
+                      status: 400)
+  end
 end
diff --git a/services/api/app/controllers/arvados/v1/jobs_controller.rb b/services/api/app/controllers/arvados/v1/jobs_controller.rb
index c3655272d..f6308c528 100644
--- a/services/api/app/controllers/arvados/v1/jobs_controller.rb
+++ b/services/api/app/controllers/arvados/v1/jobs_controller.rb
@@ -13,115 +13,28 @@ class Arvados::V1::JobsController < ApplicationController
   include DbCurrentTime
 
   def create
-    [:repository, :script, :script_version, :script_parameters].each do |r|
-      if !resource_attrs[r]
-        return send_error("#{r} attribute must be specified",
-                          status: :unprocessable_entity)
-      end
-    end
-
-    # We used to ask for the minimum_, exclude_, and no_reuse params
-    # in the job resource. Now we advertise them as flags that alter
-    # the behavior of the create action.
-    [:minimum_script_version, :exclude_script_versions].each do |attr|
-      if resource_attrs.has_key? attr
-        params[attr] = resource_attrs.delete attr
-      end
-    end
-    if resource_attrs.has_key? :no_reuse
-      params[:find_or_create] = !resource_attrs.delete(:no_reuse)
-    end
-
-    return super if !params[:find_or_create]
-    return if !load_filters_param
-
-    begin
-      @object = Job.find_reusable(resource_attrs, params, @filters, @read_users)
-    rescue ArgumentError => error
-      return send_error(error.message)
-    end
-
-    if @object
-      show
-    else
-      super
-    end
+    return send_error("Unsupported legacy jobs API",
+                      status: 400)
   end
 
   def cancel
-    reload_object_before_update
-    @object.cancel cascade: params[:cascade]
-    show
+    return send_error("Unsupported legacy jobs API",
+                      status: 400)
   end
 
   def lock
-    @object.lock current_user.uuid
-    show
-  end
-
-  class LogStreamer
-    Q_UPDATE_INTERVAL = 12
-    def initialize(job, opts={})
-      @job = job
-      @opts = opts
-    end
-    def each
-      if @job.finished_at
-        yield "#{@job.uuid} finished at #{@job.finished_at}\n"
-        return
-      end
-      while not @job.started_at
-        # send a summary (job queue + available nodes) to the client
-        # every few seconds while waiting for the job to start
-        current_time = db_current_time
-        last_ack_at ||= current_time - Q_UPDATE_INTERVAL - 1
-        if current_time - last_ack_at >= Q_UPDATE_INTERVAL
-          nodes_in_state = {idle: 0, alloc: 0}
-          ActiveRecord::Base.uncached do
-            Node.where('hostname is not ?', nil).collect do |n|
-              if n.info[:slurm_state]
-                nodes_in_state[n.info[:slurm_state]] ||= 0
-                nodes_in_state[n.info[:slurm_state]] += 1
-              end
-            end
-          end
-          job_queue = Job.queue.select(:uuid)
-          n_queued_before_me = 0
-          job_queue.each do |j|
-            break if j.uuid == @job.uuid
-            n_queued_before_me += 1
-          end
-          yield "#{db_current_time}" \
-            " job #{@job.uuid}" \
-            " queue_position #{n_queued_before_me}" \
-            " queue_size #{job_queue.count}" \
-            " nodes_idle #{nodes_in_state[:idle]}" \
-            " nodes_alloc #{nodes_in_state[:alloc]}\n"
-          last_ack_at = db_current_time
-        end
-        sleep 3
-        ActiveRecord::Base.uncached do
-          @job.reload
-        end
-      end
-    end
+    return send_error("Unsupported legacy jobs API",
+                      status: 400)
   end
 
   def queue
-    params[:order] ||= ['priority desc', 'created_at']
-    load_limit_offset_order_params
-    load_where_param
-    @where.merge!({state: Job::Queued})
-    return if !load_filters_param
-    find_objects_for_index
-    index
+    return send_error("Unsupported legacy jobs API",
+                      status: 400)
   end
 
   def queue_size
-    # Users may not be allowed to see all the jobs in the queue, so provide a
-    # method to get just the queue size in order to get a gist of how busy the
-    # cluster is.
-    render :json => {:queue_size => Job.queue.size}
+    return send_error("Unsupported legacy jobs API",
+                      status: 400)
   end
 
   def self._create_requires_parameters
diff --git a/services/api/app/controllers/arvados/v1/pipeline_instances_controller.rb b/services/api/app/controllers/arvados/v1/pipeline_instances_controller.rb
index baffda1c9..166f71049 100644
--- a/services/api/app/controllers/arvados/v1/pipeline_instances_controller.rb
+++ b/services/api/app/controllers/arvados/v1/pipeline_instances_controller.rb
@@ -7,9 +7,13 @@ class Arvados::V1::PipelineInstancesController < ApplicationController
   accept_attribute_as_json :properties, Hash
   accept_attribute_as_json :components_summary, Hash
 
+  def create
+    return send_error("Unsupported legacy jobs API",
+                      status: 400)
+  end
+
   def cancel
-    reload_object_before_update
-    @object.cancel cascade: params[:cascade]
-    show
+    return send_error("Unsupported legacy jobs API",
+                      status: 400)
   end
 end
diff --git a/services/api/app/controllers/arvados/v1/pipeline_templates_controller.rb b/services/api/app/controllers/arvados/v1/pipeline_templates_controller.rb
index a276948d5..4a5e724ee 100644
--- a/services/api/app/controllers/arvados/v1/pipeline_templates_controller.rb
+++ b/services/api/app/controllers/arvados/v1/pipeline_templates_controller.rb
@@ -4,4 +4,9 @@
 
 class Arvados::V1::PipelineTemplatesController < ApplicationController
   accept_attribute_as_json :components, Hash
+
+  def create
+    return send_error("Unsupported legacy jobs API",
+                      status: 400)
+  end
 end
diff --git a/services/api/lib/crunch_dispatch.rb b/services/api/lib/crunch_dispatch.rb
deleted file mode 100644
index 4e640186d..000000000
--- a/services/api/lib/crunch_dispatch.rb
+++ /dev/null
@@ -1,981 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-require 'open3'
-require 'shellwords'
-
-class CrunchDispatch
-  extend DbCurrentTime
-  include ApplicationHelper
-  include Process
-
-  EXIT_TEMPFAIL = 75
-  EXIT_RETRY_UNLOCKED = 93
-  RETRY_UNLOCKED_LIMIT = 3
-
-  class LogTime < Time
-    def to_s
-      self.utc.strftime "%Y-%m-%d_%H:%M:%S"
-    end
-  end
-
-  def initialize
-    @crunch_job_bin = (ENV['CRUNCH_JOB_BIN'] || `which arv-crunch-job`.strip)
-    if @crunch_job_bin.empty?
-      raise "No CRUNCH_JOB_BIN env var, and crunch-job not in path."
-    end
-
-    @docker_bin = ENV['CRUNCH_JOB_DOCKER_BIN']
-    @docker_run_args = ENV['CRUNCH_JOB_DOCKER_RUN_ARGS']
-    @cgroup_root = ENV['CRUNCH_CGROUP_ROOT']
-    @srun_sync_timeout = ENV['CRUNCH_SRUN_SYNC_TIMEOUT']
-
-    @arvados_internal = Rails.configuration.Containers.JobsAPI.GitInternalDir
-    if not File.exist? @arvados_internal
-      $stderr.puts `mkdir -p #{@arvados_internal.shellescape} && git init --bare #{@arvados_internal.shellescape}`
-      raise "No internal git repository available" unless ($? == 0)
-    end
-
-    @repo_root = Rails.configuration.Git.Repositories
-    @arvados_repo_path = Repository.where(name: "arvados").first.server_path
-    @authorizations = {}
-    @did_recently = {}
-    @fetched_commits = {}
-    @git_tags = {}
-    @node_state = {}
-    @pipe_auth_tokens = {}
-    @running = {}
-    @todo = []
-    @todo_job_retries = {}
-    @job_retry_counts = Hash.new(0)
-    @todo_pipelines = []
-  end
-
-  def sysuser
-    return act_as_system_user
-  end
-
-  def refresh_todo
-    if @runoptions[:jobs]
-      @todo = @todo_job_retries.values + Job.queue.select(&:repository)
-    end
-    if @runoptions[:pipelines]
-      @todo_pipelines = PipelineInstance.queue
-    end
-  end
-
-  def each_slurm_line(cmd, outfmt, max_fields=nil)
-    max_fields ||= outfmt.split(":").size
-    max_fields += 1  # To accommodate the node field we add
-    @@slurm_version ||= Gem::Version.new(`sinfo --version`.match(/\b[\d\.]+\b/)[0])
-    if Gem::Version.new('2.3') <= @@slurm_version
-      `#{cmd} --noheader -o '%n:#{outfmt}'`.each_line do |line|
-        yield line.chomp.split(":", max_fields)
-      end
-    else
-      # Expand rows with hostname ranges (like "foo[1-3,5,9-12]:idle")
-      # into multiple rows with one hostname each.
-      `#{cmd} --noheader -o '%N:#{outfmt}'`.each_line do |line|
-        tokens = line.chomp.split(":", max_fields)
-        if (re = tokens[0].match(/^(.*?)\[([-,\d]+)\]$/))
-          tokens.shift
-          re[2].split(",").each do |range|
-            range = range.split("-").collect(&:to_i)
-            (range[0]..range[-1]).each do |n|
-              yield [re[1] + n.to_s] + tokens
-            end
-          end
-        else
-          yield tokens
-        end
-      end
-    end
-  end
-
-  def slurm_status
-    slurm_nodes = {}
-    each_slurm_line("sinfo", "%t") do |hostname, state|
-      # Treat nodes in idle* state as down, because the * means that slurm
-      # hasn't been able to communicate with it recently.
-      state.sub!(/^idle\*/, "down")
-      state.sub!(/\W+$/, "")
-      state = "down" unless %w(idle alloc comp mix drng down).include?(state)
-      slurm_nodes[hostname] = {state: state, job: nil}
-    end
-    each_slurm_line("squeue", "%j") do |hostname, job_uuid|
-      slurm_nodes[hostname][:job] = job_uuid if slurm_nodes[hostname]
-    end
-    slurm_nodes
-  end
-
-  def update_node_status
-    return unless Rails.configuration.Containers.JobsAPI.CrunchJobWrapper.to_s.match(/^slurm/)
-    slurm_status.each_pair do |hostname, slurmdata|
-      next if @node_state[hostname] == slurmdata
-      begin
-        node = Node.where('hostname=?', hostname).order(:last_ping_at).last
-        if node
-          $stderr.puts "dispatch: update #{hostname} state to #{slurmdata}"
-          node.info["slurm_state"] = slurmdata[:state]
-          node.job_uuid = slurmdata[:job]
-          if node.save
-            @node_state[hostname] = slurmdata
-          else
-            $stderr.puts "dispatch: failed to update #{node.uuid}: #{node.errors.messages}"
-          end
-        elsif slurmdata[:state] != 'down'
-          $stderr.puts "dispatch: SLURM reports '#{hostname}' is not down, but no node has that name"
-        end
-      rescue => error
-        $stderr.puts "dispatch: error updating #{hostname} node status: #{error}"
-      end
-    end
-  end
-
-  def positive_int(raw_value, default=nil)
-    value = begin raw_value.to_i rescue 0 end
-    if value > 0
-      value
-    else
-      default
-    end
-  end
-
-  NODE_CONSTRAINT_MAP = {
-    # Map Job runtime_constraints keys to the corresponding Node info key.
-    'min_ram_mb_per_node' => 'total_ram_mb',
-    'min_scratch_mb_per_node' => 'total_scratch_mb',
-    'min_cores_per_node' => 'total_cpu_cores',
-  }
-
-  def nodes_available_for_job_now(job)
-    # Find Nodes that satisfy a Job's runtime constraints (by building
-    # a list of Procs and using them to test each Node).  If there
-    # enough to run the Job, return an array of their names.
-    # Otherwise, return nil.
-    need_procs = NODE_CONSTRAINT_MAP.each_pair.map do |job_key, node_key|
-      Proc.new do |node|
-        positive_int(node.properties[node_key], 0) >=
-          positive_int(job.runtime_constraints[job_key], 0)
-      end
-    end
-    min_node_count = positive_int(job.runtime_constraints['min_nodes'], 1)
-    usable_nodes = []
-    Node.all.select do |node|
-      node.info['slurm_state'] == 'idle'
-    end.sort_by do |node|
-      # Prefer nodes with no price, then cheap nodes, then expensive nodes
-      node.properties['cloud_node']['price'].to_f rescue 0
-    end.each do |node|
-      if need_procs.select { |need_proc| not need_proc.call(node) }.any?
-        # At least one runtime constraint is not satisfied by this node
-        next
-      end
-      usable_nodes << node
-      if usable_nodes.count >= min_node_count
-        hostnames = usable_nodes.map(&:hostname)
-        log_nodes = usable_nodes.map do |n|
-          "#{n.hostname} #{n.uuid} #{n.properties.to_json}"
-        end
-        log_job = "#{job.uuid} #{job.runtime_constraints}"
-        log_text = "dispatching job #{log_job} to #{log_nodes.join(", ")}"
-        $stderr.puts log_text
-        begin
-          act_as_system_user do
-            Log.new(object_uuid: job.uuid,
-                    event_type: 'dispatch',
-                    owner_uuid: system_user_uuid,
-                    summary: "dispatching to #{hostnames.join(", ")}",
-                    properties: {'text' => log_text}).save!
-          end
-        rescue => e
-          $stderr.puts "dispatch: log.create failed: #{e}"
-        end
-        return hostnames
-      end
-    end
-    nil
-  end
-
-  def nodes_available_for_job(job)
-    # Check if there are enough idle nodes with the Job's minimum
-    # hardware requirements to run it.  If so, return an array of
-    # their names.  If not, up to once per hour, signal start_jobs to
-    # hold off launching Jobs.  This delay is meant to give the Node
-    # Manager an opportunity to make new resources available for new
-    # Jobs.
-    #
-    # The exact timing parameters here might need to be adjusted for
-    # the best balance between helping the longest-waiting Jobs run,
-    # and making efficient use of immediately available resources.
-    # These are all just first efforts until we have more data to work
-    # with.
-    nodelist = nodes_available_for_job_now(job)
-    if nodelist.nil? and not did_recently(:wait_for_available_nodes, 3600)
-      $stderr.puts "dispatch: waiting for nodes for #{job.uuid}"
-      @node_wait_deadline = Time.now + 5.minutes
-    end
-    nodelist
-  end
-
-  def fail_job job, message, skip_lock: false
-    $stderr.puts "dispatch: #{job.uuid}: #{message}"
-    begin
-      Log.new(object_uuid: job.uuid,
-              event_type: 'dispatch',
-              owner_uuid: job.owner_uuid,
-              summary: message,
-              properties: {"text" => message}).save!
-    rescue => e
-      $stderr.puts "dispatch: log.create failed: #{e}"
-    end
-
-    if not skip_lock and not have_job_lock?(job)
-      begin
-        job.lock @authorizations[job.uuid].user.uuid
-      rescue ArvadosModel::AlreadyLockedError
-        $stderr.puts "dispatch: tried to mark job #{job.uuid} as failed but it was already locked by someone else"
-        return
-      end
-    end
-
-    job.state = "Failed"
-    if not job.save
-      $stderr.puts "dispatch: save failed setting job #{job.uuid} to failed"
-    end
-  end
-
-  def stdout_s(cmd_a, opts={})
-    IO.popen(cmd_a, "r", opts) do |pipe|
-      return pipe.read.chomp
-    end
-  end
-
-  def git_cmd(*cmd_a)
-    ["git", "--git-dir=#{@arvados_internal}"] + cmd_a
-  end
-
-  def get_authorization(job)
-    if @authorizations[job.uuid] and
-        @authorizations[job.uuid].user.uuid != job.modified_by_user_uuid
-      # We already made a token for this job, but we need a new one
-      # because modified_by_user_uuid has changed (the job will run
-      # as a different user).
-      @authorizations[job.uuid].update_attributes expires_at: Time.now
-      @authorizations[job.uuid] = nil
-    end
-    if not @authorizations[job.uuid]
-      auth = ApiClientAuthorization.
-        new(user: User.where('uuid=?', job.modified_by_user_uuid).first,
-            api_client_id: 0)
-      if not auth.save
-        $stderr.puts "dispatch: auth.save failed for #{job.uuid}"
-      else
-        @authorizations[job.uuid] = auth
-      end
-    end
-    @authorizations[job.uuid]
-  end
-
-  def internal_repo_has_commit? sha1
-    if (not @fetched_commits[sha1] and
-        sha1 == stdout_s(git_cmd("rev-list", "-n1", sha1), err: "/dev/null") and
-        $? == 0)
-      @fetched_commits[sha1] = true
-    end
-    return @fetched_commits[sha1]
-  end
-
-  def get_commit src_repo, sha1
-    return true if internal_repo_has_commit? sha1
-
-    # commit does not exist in internal repository, so import the
-    # source repository using git fetch-pack
-    cmd = git_cmd("fetch-pack", "--no-progress", "--all", src_repo)
-    $stderr.puts "dispatch: #{cmd}"
-    $stderr.puts(stdout_s(cmd))
-    @fetched_commits[sha1] = ($? == 0)
-  end
-
-  def tag_commit(job, commit_hash, tag_name)
-    # @git_tags[T]==V if we know commit V has been tagged T in the
-    # arvados_internal repository.
-    if not @git_tags[tag_name]
-      cmd = git_cmd("tag", tag_name, commit_hash)
-      $stderr.puts "dispatch: #{cmd}"
-      $stderr.puts(stdout_s(cmd, err: "/dev/null"))
-      unless $? == 0
-        # git tag failed.  This may be because the tag already exists, so check for that.
-        tag_rev = stdout_s(git_cmd("rev-list", "-n1", tag_name))
-        if $? == 0
-          # We got a revision back
-          if tag_rev != commit_hash
-            # Uh oh, the tag doesn't point to the revision we were expecting.
-            # Someone has been monkeying with the job record and/or git.
-            fail_job job, "Existing tag #{tag_name} points to commit #{tag_rev} but expected commit #{commit_hash}"
-            return nil
-          end
-          # we're okay (fall through to setting @git_tags below)
-        else
-          # git rev-list failed for some reason.
-          fail_job job, "'git tag' for #{tag_name} failed but did not find any existing tag using 'git rev-list'"
-          return nil
-        end
-      end
-      # 'git tag' was successful, or there is an existing tag that points to the same revision.
-      @git_tags[tag_name] = commit_hash
-    elsif @git_tags[tag_name] != commit_hash
-      fail_job job, "Existing tag #{tag_name} points to commit #{@git_tags[tag_name]} but this job uses commit #{commit_hash}"
-      return nil
-    end
-    @git_tags[tag_name]
-  end
-
-  def start_jobs
-    @todo.each do |job|
-      next if @running[job.uuid]
-
-      cmd_args = nil
-      case Rails.configuration.Containers.JobsAPI.CrunchJobWrapper
-      when "none"
-        if @running.size > 0
-            # Don't run more than one at a time.
-            return
-        end
-        cmd_args = []
-      when "slurm_immediate"
-        nodelist = nodes_available_for_job(job)
-        if nodelist.nil?
-          if Time.now < @node_wait_deadline
-            break
-          else
-            next
-          end
-        end
-        cmd_args = ["salloc",
-                    "--chdir=/",
-                    "--immediate",
-                    "--exclusive",
-                    "--no-kill",
-                    "--job-name=#{job.uuid}",
-                    "--nodelist=#{nodelist.join(',')}"]
-      else
-        raise "Unknown crunch_job_wrapper: #{Rails.configuration.Containers.JobsAPI.CrunchJobWrapper}"
-      end
-
-      cmd_args = sudo_preface + cmd_args
-
-      next unless get_authorization job
-
-      ready = internal_repo_has_commit? job.script_version
-
-      if not ready
-        # Import the commit from the specified repository into the
-        # internal repository. This should have been done already when
-        # the job was created/updated; this code is obsolete except to
-        # avoid deployment races. Failing the job would be a
-        # reasonable thing to do at this point.
-        repo = Repository.where(name: job.repository).first
-        if repo.nil? or repo.server_path.nil?
-          fail_job job, "Repository #{job.repository} not found under #{@repo_root}"
-          next
-        end
-        ready &&= get_commit repo.server_path, job.script_version
-        ready &&= tag_commit job, job.script_version, job.uuid
-      end
-
-      # This should be unnecessary, because API server does it during
-      # job create/update, but it's still not a bad idea to verify the
-      # tag is correct before starting the job:
-      ready &&= tag_commit job, job.script_version, job.uuid
-
-      # The arvados_sdk_version doesn't support use of arbitrary
-      # remote URLs, so the requested version isn't necessarily copied
-      # into the internal repository yet.
-      if job.arvados_sdk_version
-        ready &&= get_commit @arvados_repo_path, job.arvados_sdk_version
-        ready &&= tag_commit job, job.arvados_sdk_version, "#{job.uuid}-arvados-sdk"
-      end
-
-      if not ready
-        fail_job job, "commit not present in internal repository"
-        next
-      end
-
-      cmd_args += [@crunch_job_bin,
-                   '--job-api-token', @authorizations[job.uuid].api_token,
-                   '--job', job.uuid,
-                   '--git-dir', @arvados_internal]
-
-      if @cgroup_root
-        cmd_args += ['--cgroup-root', @cgroup_root]
-      end
-
-      if @docker_bin
-        cmd_args += ['--docker-bin', @docker_bin]
-      end
-
-      if @docker_run_args
-        cmd_args += ['--docker-run-args', @docker_run_args]
-      end
-
-      if @srun_sync_timeout
-        cmd_args += ['--srun-sync-timeout', @srun_sync_timeout]
-      end
-
-      if have_job_lock?(job)
-        cmd_args << "--force-unlock"
-      end
-
-      $stderr.puts "dispatch: #{cmd_args.join ' '}"
-
-      begin
-        i, o, e, t = Open3.popen3(*cmd_args)
-      rescue
-        $stderr.puts "dispatch: popen3: #{$!}"
-        # This is a dispatch problem like "Too many open files";
-        # retrying another job right away would be futile. Just return
-        # and hope things are better next time, after (at least) a
-        # did_recently() delay.
-        return
-      end
-
-      $stderr.puts "dispatch: job #{job.uuid}"
-      start_banner = "dispatch: child #{t.pid} start #{LogTime.now}"
-      $stderr.puts start_banner
-
-      @running[job.uuid] = {
-        stdin: i,
-        stdout: o,
-        stderr: e,
-        wait_thr: t,
-        job: job,
-        buf: {stderr: '', stdout: ''},
-        started: false,
-        sent_int: 0,
-        job_auth: @authorizations[job.uuid],
-        stderr_buf_to_flush: '',
-        stderr_flushed_at: Time.new(0),
-        bytes_logged: 0,
-        events_logged: 0,
-        log_throttle_is_open: true,
-        log_throttle_reset_time: Time.now + Rails.configuration.Containers.Logging.LogThrottlePeriod,
-        log_throttle_bytes_so_far: 0,
-        log_throttle_lines_so_far: 0,
-        log_throttle_bytes_skipped: 0,
-        log_throttle_partial_line_last_at: Time.new(0),
-        log_throttle_first_partial_line: true,
-      }
-      i.close
-      @todo_job_retries.delete(job.uuid)
-      update_node_status
-    end
-  end
-
-  # Test for hard cap on total output and for log throttling.  Returns whether
-  # the log line should go to output or not.  Modifies "line" in place to
-  # replace it with an error if a logging limit is tripped.
-  def rate_limit running_job, line
-    message = false
-    linesize = line.size
-    if running_job[:log_throttle_is_open]
-      partial_line = false
-      skip_counts = false
-      matches = line.match(/^\S+ \S+ \d+ \d+ stderr (.*)/)
-      if matches and matches[1] and matches[1].start_with?('[...]') and matches[1].end_with?('[...]')
-        partial_line = true
-        if Time.now > running_job[:log_throttle_partial_line_last_at] + Rails.configuration.Containers.Logging.LogPartialLineThrottlePeriod
-          running_job[:log_throttle_partial_line_last_at] = Time.now
-        else
-          skip_counts = true
-        end
-      end
-
-      if !skip_counts
-        running_job[:log_throttle_lines_so_far] += 1
-        running_job[:log_throttle_bytes_so_far] += linesize
-        running_job[:bytes_logged] += linesize
-      end
-
-      if (running_job[:bytes_logged] >
-          Rails.configuration.Containers.Logging.LimitLogBytesPerJob)
-        message = "Exceeded log limit #{Rails.configuration.Containers.Logging.LimitLogBytesPerJob} bytes (LimitLogBytesPerJob). Log will be truncated."
-        running_job[:log_throttle_reset_time] = Time.now + 100.years
-        running_job[:log_throttle_is_open] = false
-
-      elsif (running_job[:log_throttle_bytes_so_far] >
-             Rails.configuration.Containers.Logging.LogThrottleBytes)
-        remaining_time = running_job[:log_throttle_reset_time] - Time.now
-        message = "Exceeded rate #{Rails.configuration.Containers.Logging.LogThrottleBytes} bytes per #{Rails.configuration.Containers.Logging.LogThrottlePeriod} seconds (LogThrottleBytes). Logging will be silenced for the next #{remaining_time.round} seconds."
-        running_job[:log_throttle_is_open] = false
-
-      elsif (running_job[:log_throttle_lines_so_far] >
-             Rails.configuration.Containers.Logging.LogThrottleLines)
-        remaining_time = running_job[:log_throttle_reset_time] - Time.now
-        message = "Exceeded rate #{Rails.configuration.Containers.Logging.LogThrottleLines} lines per #{Rails.configuration.Containers.Logging.LogThrottlePeriod} seconds (LogThrottleLines), logging will be silenced for the next #{remaining_time.round} seconds."
-        running_job[:log_throttle_is_open] = false
-
-      elsif partial_line and running_job[:log_throttle_first_partial_line]
-        running_job[:log_throttle_first_partial_line] = false
-        message = "Rate-limiting partial segments of long lines to one every #{Rails.configuration.Containers.Logging.LogPartialLineThrottlePeriod} seconds."
-      end
-    end
-
-    if not running_job[:log_throttle_is_open]
-      # Don't log anything if any limit has been exceeded. Just count lossage.
-      running_job[:log_throttle_bytes_skipped] += linesize
-    end
-
-    if message
-      # Yes, write to logs, but use our "rate exceeded" message
-      # instead of the log message that exceeded the limit.
-      message += " A complete log is still being written to Keep, and will be available when the job finishes.\n"
-      line.replace message
-      true
-    elsif partial_line
-      false
-    else
-      running_job[:log_throttle_is_open]
-    end
-  end
-
-  def read_pipes
-    @running.each do |job_uuid, j|
-      now = Time.now
-      if now > j[:log_throttle_reset_time]
-        # It has been more than throttle_period seconds since the last
-        # checkpoint so reset the throttle
-        if j[:log_throttle_bytes_skipped] > 0
-          message = "#{job_uuid} ! Skipped #{j[:log_throttle_bytes_skipped]} bytes of log"
-          $stderr.puts message
-          j[:stderr_buf_to_flush] << "#{LogTime.now} #{message}\n"
-        end
-
-        j[:log_throttle_reset_time] = now + Rails.configuration.Containers.Logging.LogThrottlePeriod
-        j[:log_throttle_bytes_so_far] = 0
-        j[:log_throttle_lines_so_far] = 0
-        j[:log_throttle_bytes_skipped] = 0
-        j[:log_throttle_is_open] = true
-        j[:log_throttle_partial_line_last_at] = Time.new(0)
-        j[:log_throttle_first_partial_line] = true
-      end
-
-      j[:buf].each do |stream, streambuf|
-        # Read some data from the child stream
-        buf = ''
-        begin
-          # It's important to use a big enough buffer here. When we're
-          # being flooded with logs, we must read and discard many
-          # bytes at once. Otherwise, we can easily peg a CPU with
-          # time-checking and other loop overhead. (Quick tests show a
-          # 1MiB buffer working 2.5x as fast as a 64 KiB buffer.)
-          #
-          # So don't reduce this buffer size!
-          buf = j[stream].read_nonblock(2**20)
-        rescue Errno::EAGAIN, EOFError
-        end
-
-        # Short circuit the counting code if we're just going to throw
-        # away the data anyway.
-        if not j[:log_throttle_is_open]
-          j[:log_throttle_bytes_skipped] += streambuf.size + buf.size
-          streambuf.replace ''
-          next
-        elsif buf == ''
-          next
-        end
-
-        # Append to incomplete line from previous read, if any
-        streambuf << buf
-
-        bufend = ''
-        streambuf.each_line do |line|
-          if not line.end_with? $/
-            if line.size > Rails.configuration.Containers.Logging.LogThrottleBytes
-              # Without a limit here, we'll use 2x an arbitrary amount
-              # of memory, and waste a lot of time copying strings
-              # around, all without providing any feedback to anyone
-              # about what's going on _or_ hitting any of our throttle
-              # limits.
-              #
-              # Here we leave "line" alone, knowing it will never be
-              # sent anywhere: rate_limit() will reach
-              # crunch_log_throttle_bytes immediately. However, we'll
-              # leave [...] in bufend: if the trailing end of the long
-              # line does end up getting sent anywhere, it will have
-              # some indication that it is incomplete.
-              bufend = "[...]"
-            else
-              # If line length is sane, we'll wait for the rest of the
-              # line to appear in the next read_pipes() call.
-              bufend = line
-              break
-            end
-          end
-          # rate_limit returns true or false as to whether to actually log
-          # the line or not.  It also modifies "line" in place to replace
-          # it with an error if a logging limit is tripped.
-          if rate_limit j, line
-            $stderr.print "#{job_uuid} ! " unless line.index(job_uuid)
-            $stderr.puts line
-            pub_msg = "#{LogTime.now} #{line.strip}\n"
-            j[:stderr_buf_to_flush] << pub_msg
-          end
-        end
-
-        # Leave the trailing incomplete line (if any) in streambuf for
-        # next time.
-        streambuf.replace bufend
-      end
-      # Flush buffered logs to the logs table, if appropriate. We have
-      # to do this even if we didn't collect any new logs this time:
-      # otherwise, buffered data older than seconds_between_events
-      # won't get flushed until new data arrives.
-      write_log j
-    end
-  end
-
-  def reap_children
-    return if 0 == @running.size
-    pid_done = nil
-    j_done = nil
-
-    @running.each do |uuid, j|
-      if !j[:wait_thr].status
-        pid_done = j[:wait_thr].pid
-        j_done = j
-        break
-      end
-    end
-
-    return if !pid_done
-
-    job_done = j_done[:job]
-
-    # Ensure every last drop of stdout and stderr is consumed.
-    read_pipes
-    # Reset flush timestamp to make sure log gets written.
-    j_done[:stderr_flushed_at] = Time.new(0)
-    # Write any remaining logs.
-    write_log j_done
-
-    j_done[:buf].each do |stream, streambuf|
-      if streambuf != ''
-        $stderr.puts streambuf + "\n"
-      end
-    end
-
-    # Wait the thread (returns a Process::Status)
-    exit_status = j_done[:wait_thr].value.exitstatus
-    exit_tempfail = exit_status == EXIT_TEMPFAIL
-
-    $stderr.puts "dispatch: child #{pid_done} exit #{exit_status}"
-    $stderr.puts "dispatch: job #{job_done.uuid} end"
-
-    jobrecord = Job.find_by_uuid(job_done.uuid)
-
-    if exit_status == EXIT_RETRY_UNLOCKED or (exit_tempfail and @job_retry_counts.include? jobrecord.uuid)
-      $stderr.puts("dispatch: job #{jobrecord.uuid} was interrupted by node failure")
-      # Only this crunch-dispatch process can retry the job:
-      # it's already locked, and there's no way to put it back in the
-      # Queued state.  Put it in our internal todo list unless the job
-      # has failed this way excessively.
-      @job_retry_counts[jobrecord.uuid] += 1
-      exit_tempfail = @job_retry_counts[jobrecord.uuid] <= RETRY_UNLOCKED_LIMIT
-      do_what_next = "give up now"
-      if exit_tempfail
-        @todo_job_retries[jobrecord.uuid] = jobrecord
-        do_what_next = "re-attempt"
-      end
-      $stderr.puts("dispatch: job #{jobrecord.uuid} has been interrupted " +
-                   "#{@job_retry_counts[jobrecord.uuid]}x, will #{do_what_next}")
-    end
-
-    if !exit_tempfail
-      @job_retry_counts.delete(jobrecord.uuid)
-      if jobrecord.state == "Running"
-        # Apparently there was an unhandled error.  That could potentially
-        # include "all allocated nodes failed" when we don't to retry
-        # because the job has already been retried RETRY_UNLOCKED_LIMIT
-        # times.  Fail the job.
-        jobrecord.state = "Failed"
-        if not jobrecord.save
-          $stderr.puts "dispatch: jobrecord.save failed"
-        end
-      end
-    else
-      # If the job failed to run due to an infrastructure
-      # issue with crunch-job or slurm, we want the job to stay in the
-      # queue. If crunch-job exited after losing a race to another
-      # crunch-job process, it exits 75 and we should leave the job
-      # record alone so the winner of the race can do its thing.
-      # If crunch-job exited after all of its allocated nodes failed,
-      # it exits 93, and we want to retry it later (see the
-      # EXIT_RETRY_UNLOCKED `if` block).
-      #
-      # There is still an unhandled race condition: If our crunch-job
-      # process is about to lose a race with another crunch-job
-      # process, but crashes before getting to its "exit 75" (for
-      # example, "cannot fork" or "cannot reach API server") then we
-      # will assume incorrectly that it's our process's fault
-      # jobrecord.started_at is non-nil, and mark the job as failed
-      # even though the winner of the race is probably still doing
-      # fine.
-    end
-
-    # Invalidate the per-job auth token, unless the job is still queued and we
-    # might want to try it again.
-    if jobrecord.state != "Queued" and !@todo_job_retries.include?(jobrecord.uuid)
-      j_done[:job_auth].update_attributes expires_at: Time.now
-    end
-
-    @running.delete job_done.uuid
-  end
-
-  def update_pipelines
-    expire_tokens = @pipe_auth_tokens.dup
-    @todo_pipelines.each do |p|
-      pipe_auth = (@pipe_auth_tokens[p.uuid] ||= ApiClientAuthorization.
-                   create(user: User.where('uuid=?', p.modified_by_user_uuid).first,
-                          api_client_id: 0))
-      puts `export ARVADOS_API_TOKEN=#{pipe_auth.api_token} && arv-run-pipeline-instance --run-pipeline-here --no-wait --instance #{p.uuid}`
-      expire_tokens.delete p.uuid
-    end
-
-    expire_tokens.each do |k, v|
-      v.update_attributes expires_at: Time.now
-      @pipe_auth_tokens.delete k
-    end
-  end
-
-  def parse_argv argv
-    @runoptions = {}
-    (argv.any? ? argv : ['--jobs', '--pipelines']).each do |arg|
-      case arg
-      when '--jobs'
-        @runoptions[:jobs] = true
-      when '--pipelines'
-        @runoptions[:pipelines] = true
-      else
-        abort "Unrecognized command line option '#{arg}'"
-      end
-    end
-    if not (@runoptions[:jobs] or @runoptions[:pipelines])
-      abort "Nothing to do. Please specify at least one of: --jobs, --pipelines."
-    end
-  end
-
-  def run argv
-    parse_argv argv
-
-    # We want files written by crunch-dispatch to be writable by other
-    # processes with the same GID, see bug #7228
-    File.umask(0002)
-
-    # This is how crunch-job child procs know where the "refresh"
-    # trigger file is
-    ENV["CRUNCH_REFRESH_TRIGGER"] = Rails.configuration.Containers.JobsAPI.CrunchRefreshTrigger
-
-    # If salloc can't allocate resources immediately, make it use our
-    # temporary failure exit code.  This ensures crunch-dispatch won't
-    # mark a job failed because of an issue with node allocation.
-    # This often happens when another dispatcher wins the race to
-    # allocate nodes.
-    ENV["SLURM_EXIT_IMMEDIATE"] = CrunchDispatch::EXIT_TEMPFAIL.to_s
-
-    if ENV["CRUNCH_DISPATCH_LOCKFILE"]
-      lockfilename = ENV.delete "CRUNCH_DISPATCH_LOCKFILE"
-      lockfile = File.open(lockfilename, File::RDWR|File::CREAT, 0644)
-      unless lockfile.flock File::LOCK_EX|File::LOCK_NB
-        abort "Lock unavailable on #{lockfilename} - exit"
-      end
-    end
-
-    @signal = {}
-    %w{TERM INT}.each do |sig|
-      signame = sig
-      Signal.trap(sig) do
-        $stderr.puts "Received #{signame} signal"
-        @signal[:term] = true
-      end
-    end
-
-    act_as_system_user
-    User.first.group_permissions
-    $stderr.puts "dispatch: ready"
-    while !@signal[:term] or @running.size > 0
-      read_pipes
-      if @signal[:term]
-        @running.each do |uuid, j|
-          if !j[:started] and j[:sent_int] < 2
-            begin
-              Process.kill 'INT', j[:wait_thr].pid
-            rescue Errno::ESRCH
-              # No such pid = race condition + desired result is
-              # already achieved
-            end
-            j[:sent_int] += 1
-          end
-        end
-      else
-        refresh_todo unless did_recently(:refresh_todo, 1.0)
-        update_node_status unless did_recently(:update_node_status, 1.0)
-        unless @todo.empty? or did_recently(:start_jobs, 1.0) or @signal[:term]
-          start_jobs
-        end
-        unless (@todo_pipelines.empty? and @pipe_auth_tokens.empty?) or did_recently(:update_pipelines, 5.0)
-          update_pipelines
-        end
-        unless did_recently('check_orphaned_slurm_jobs', 60)
-          check_orphaned_slurm_jobs
-        end
-      end
-      reap_children
-      select(@running.values.collect { |j| [j[:stdout], j[:stderr]] }.flatten,
-             [], [], 1)
-    end
-    # If there are jobs we wanted to retry, we have to mark them as failed now.
-    # Other dispatchers can't pick them up because we hold their lock.
-    @todo_job_retries.each_key do |job_uuid|
-      job = Job.find_by_uuid(job_uuid)
-      if job.state == "Running"
-        fail_job(job, "crunch-dispatch was stopped during job's tempfail retry loop")
-      end
-    end
-  end
-
-  def fail_jobs before: nil
-    act_as_system_user do
-      threshold = nil
-      if before == 'reboot'
-        boottime = nil
-        open('/proc/stat').map(&:split).each do |stat, t|
-          if stat == 'btime'
-            boottime = t
-          end
-        end
-        if not boottime
-          raise "Could not find btime in /proc/stat"
-        end
-        threshold = Time.at(boottime.to_i)
-      elsif before
-        threshold = Time.parse(before, Time.now)
-      else
-        threshold = db_current_time
-      end
-      Rails.logger.info "fail_jobs: threshold is #{threshold}"
-
-      squeue = squeue_jobs
-      Job.where('state = ? and started_at < ?', Job::Running, threshold).
-        each do |job|
-        Rails.logger.debug "fail_jobs: #{job.uuid} started #{job.started_at}"
-        squeue.each do |slurm_name|
-          if slurm_name == job.uuid
-            Rails.logger.info "fail_jobs: scancel #{job.uuid}"
-            scancel slurm_name
-          end
-        end
-        fail_job(job, "cleaned up stale job: started before #{threshold}",
-                 skip_lock: true)
-      end
-    end
-  end
-
-  def check_orphaned_slurm_jobs
-    act_as_system_user do
-      squeue_uuids = squeue_jobs.select{|uuid| uuid.match(/^[0-9a-z]{5}-8i9sb-[0-9a-z]{15}$/)}.
-                                  select{|uuid| !@running.has_key?(uuid)}
-
-      return if squeue_uuids.size == 0
-
-      scancel_uuids = squeue_uuids - Job.where('uuid in (?) and (state in (?) or modified_at>?)',
-                                               squeue_uuids,
-                                               ['Running', 'Queued'],
-                                               (Time.now - 60)).
-                                         collect(&:uuid)
-      scancel_uuids.each do |uuid|
-        Rails.logger.info "orphaned job: scancel #{uuid}"
-        scancel uuid
-      end
-    end
-  end
-
-  def sudo_preface
-    return [] if not Rails.configuration.Containers.JobsAPI.CrunchJobUser
-    ["sudo", "-E", "-u",
-     Rails.configuration.Containers.JobsAPI.CrunchJobUser,
-     "LD_LIBRARY_PATH=#{ENV['LD_LIBRARY_PATH']}",
-     "PATH=#{ENV['PATH']}",
-     "PERLLIB=#{ENV['PERLLIB']}",
-     "PYTHONPATH=#{ENV['PYTHONPATH']}",
-     "RUBYLIB=#{ENV['RUBYLIB']}",
-     "GEM_PATH=#{ENV['GEM_PATH']}"]
-  end
-
-  protected
-
-  def have_job_lock?(job)
-    # Return true if the given job is locked by this crunch-dispatch, normally
-    # because we've run crunch-job for it.
-    @todo_job_retries.include?(job.uuid)
-  end
-
-  def did_recently(thing, min_interval)
-    if !@did_recently[thing] or @did_recently[thing] < Time.now - min_interval
-      @did_recently[thing] = Time.now
-      false
-    else
-      true
-    end
-  end
-
-  # send message to log table. we want these records to be transient
-  def write_log running_job
-    return if running_job[:stderr_buf_to_flush] == ''
-
-    # Send out to log event if buffer size exceeds the bytes per event or if
-    # it has been at least crunch_log_seconds_between_events seconds since
-    # the last flush.
-    if running_job[:stderr_buf_to_flush].size > Rails.configuration.Containers.Logging.LogBytesPerEvent or
-        (Time.now - running_job[:stderr_flushed_at]) >= Rails.configuration.Containers.Logging.LogSecondsBetweenEvents
-      begin
-        log = Log.new(object_uuid: running_job[:job].uuid,
-                      event_type: 'stderr',
-                      owner_uuid: running_job[:job].owner_uuid,
-                      properties: {"text" => running_job[:stderr_buf_to_flush]})
-        log.save!
-        running_job[:events_logged] += 1
-      rescue => exception
-        $stderr.puts "Failed to write logs"
-        $stderr.puts exception.backtrace
-      end
-      running_job[:stderr_buf_to_flush] = ''
-      running_job[:stderr_flushed_at] = Time.now
-    end
-  end
-
-  # An array of job_uuids in squeue
-  def squeue_jobs
-    if Rails.configuration.Containers.JobsAPI.CrunchJobWrapper == "slurm_immediate"
-      p = IO.popen(['squeue', '-a', '-h', '-o', '%j'])
-      begin
-        p.readlines.map {|line| line.strip}
-      ensure
-        p.close
-      end
-    else
-      []
-    end
-  end
-
-  def scancel slurm_name
-    cmd = sudo_preface + ['scancel', '-n', slurm_name]
-    IO.popen(cmd) do |scancel_pipe|
-      puts scancel_pipe.read
-    end
-    if not $?.success?
-      Rails.logger.error "scancel #{slurm_name.shellescape}: $?"
-    end
-  end
-end
diff --git a/services/api/script/crunch-dispatch.rb b/services/api/script/crunch-dispatch.rb
deleted file mode 100755
index 38bd54b5c..000000000
--- a/services/api/script/crunch-dispatch.rb
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/usr/bin/env ruby
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-dispatch_argv = []
-ARGV.reject! do |arg|
-  dispatch_argv.push(arg) if /^--/ =~ arg
-end
-
-ENV["RAILS_ENV"] = ARGV[0] || ENV["RAILS_ENV"] || "development"
-require File.dirname(__FILE__) + '/../config/boot'
-require File.dirname(__FILE__) + '/../config/environment'
-require './lib/crunch_dispatch.rb'
-
-CrunchDispatch.new.run dispatch_argv
diff --git a/services/api/script/crunch_failure_report.py b/services/api/script/crunch_failure_report.py
deleted file mode 100755
index 83217d851..000000000
--- a/services/api/script/crunch_failure_report.py
+++ /dev/null
@@ -1,222 +0,0 @@
-#! /usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-import argparse
-import datetime
-import json
-import re
-import sys
-
-import arvados
-
-# Useful configuration variables:
-
-# Number of log lines to use as context in diagnosing failure.
-LOG_CONTEXT_LINES = 10
-
-# Regex that signifies a failed task.
-FAILED_TASK_REGEX = re.compile(' \d+ failure (.*permanent)')
-
-# Regular expressions used to classify failure types.
-JOB_FAILURE_TYPES = {
-    'sys/docker': 'Cannot destroy container',
-    'crunch/node': 'User not found on host',
-    'slurm/comm':  'Communication connection failure'
-}
-
-def parse_arguments(arguments):
-    arg_parser = argparse.ArgumentParser(
-        description='Produce a report of Crunch failures within a specified time range')
-
-    arg_parser.add_argument(
-        '--start',
-        help='Start date and time')
-    arg_parser.add_argument(
-        '--end',
-        help='End date and time')
-
-    args = arg_parser.parse_args(arguments)
-
-    if args.start and not is_valid_timestamp(args.start):
-        raise ValueError(args.start)
-    if args.end and not is_valid_timestamp(args.end):
-        raise ValueError(args.end)
-
-    return args
-
-
-def api_timestamp(when=None):
-    """Returns a string representing the timestamp 'when' in a format
-    suitable for delivering to the API server.  Defaults to the
-    current time.
-    """
-    if when is None:
-        when = datetime.datetime.utcnow()
-    return when.strftime("%Y-%m-%dT%H:%M:%SZ")
-
-
-def is_valid_timestamp(ts):
-    return re.match(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z', ts)
-
-
-def jobs_created_between_dates(api, start, end):
-    return arvados.util.list_all(
-        api.jobs().list,
-        filters=json.dumps([ ['created_at', '>=', start],
-                             ['created_at', '<=', end] ]))
-
-
-def job_logs(api, job):
-    # Returns the contents of the log for this job (as an array of lines).
-    if job['log']:
-        log_collection = arvados.CollectionReader(job['log'], api)
-        log_filename = "{}.log.txt".format(job['uuid'])
-        return log_collection.open(log_filename).readlines()
-    return []
-
-
-user_names = {}
-def job_user_name(api, user_uuid):
-    def _lookup_user_name(api, user_uuid):
-        try:
-            return api.users().get(uuid=user_uuid).execute()['full_name']
-        except arvados.errors.ApiError:
-            return user_uuid
-
-    if user_uuid not in user_names:
-        user_names[user_uuid] = _lookup_user_name(api, user_uuid)
-    return user_names[user_uuid]
-
-
-job_pipeline_names = {}
-def job_pipeline_name(api, job_uuid):
-    def _lookup_pipeline_name(api, job_uuid):
-        try:
-            pipelines = api.pipeline_instances().list(
-                filters='[["components", "like", "%{}%"]]'.format(job_uuid)).execute()
-            pi = pipelines['items'][0]
-            if pi['name']:
-                return pi['name']
-            else:
-                # Use the pipeline template name
-                pt = api.pipeline_templates().get(uuid=pi['pipeline_template_uuid']).execute()
-                return pt['name']
-        except (TypeError, ValueError, IndexError):
-            return ""
-
-    if job_uuid not in job_pipeline_names:
-        job_pipeline_names[job_uuid] = _lookup_pipeline_name(api, job_uuid)
-    return job_pipeline_names[job_uuid]
-
-
-def is_failed_task(logline):
-    return FAILED_TASK_REGEX.search(logline) != None
-
-
-def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
-    args = parse_arguments(arguments)
-
-    api = arvados.api('v1')
-
-    now = datetime.datetime.utcnow()
-    start_time = args.start or api_timestamp(now - datetime.timedelta(days=1))
-    end_time = args.end or api_timestamp(now)
-
-    # Find all jobs created within the specified window,
-    # and their corresponding job logs.
-    jobs_created = jobs_created_between_dates(api, start_time, end_time)
-    jobs_by_state = {}
-    for job in jobs_created:
-        jobs_by_state.setdefault(job['state'], [])
-        jobs_by_state[job['state']].append(job)
-
-    # Find failed jobs and record the job failure text.
-
-    # failure_stats maps failure types (e.g. "sys/docker") to
-    # a set of job UUIDs that failed for that reason.
-    failure_stats = {}
-    for job in jobs_by_state['Failed']:
-        job_uuid = job['uuid']
-        logs = job_logs(api, job)
-        # Find the first permanent task failure, and collect the
-        # preceding log lines.
-        failure_type = None
-        for i, lg in enumerate(logs):
-            if is_failed_task(lg):
-                # Get preceding log record to provide context.
-                log_start = i - LOG_CONTEXT_LINES if i >= LOG_CONTEXT_LINES else 0
-                log_end = i + 1
-                lastlogs = ''.join(logs[log_start:log_end])
-                # try to identify the type of failure.
-                for key, rgx in JOB_FAILURE_TYPES.iteritems():
-                    if re.search(rgx, lastlogs):
-                        failure_type = key
-                        break
-            if failure_type is not None:
-                break
-        if failure_type is None:
-            failure_type = 'unknown'
-        failure_stats.setdefault(failure_type, set())
-        failure_stats[failure_type].add(job_uuid)
-
-    # Report percentages of successful, failed and unfinished jobs.
-    print "Start: {:20s}".format(start_time)
-    print "End:   {:20s}".format(end_time)
-    print ""
-
-    print "Overview"
-    print ""
-
-    job_start_count = len(jobs_created)
-    print "  {: <25s} {:4d}".format('Started', job_start_count)
-    for state in ['Complete', 'Failed', 'Queued', 'Cancelled', 'Running']:
-        if state in jobs_by_state:
-            job_count = len(jobs_by_state[state])
-            job_percentage = job_count / float(job_start_count)
-            print "  {: <25s} {:4d} ({: >4.0%})".format(state,
-                                                        job_count,
-                                                        job_percentage)
-    print ""
-
-    # Report failure types.
-    failure_summary = ""
-    failure_detail = ""
-
-    # Generate a mapping from failed job uuids to job records, to assist
-    # in generating detailed statistics for job failures.
-    jobs_failed_map = { job['uuid']: job for job in jobs_by_state.get('Failed', []) }
-
-    # sort the failure stats in descending order by occurrence.
-    sorted_failures = sorted(failure_stats,
-                             reverse=True,
-                             key=lambda failure_type: len(failure_stats[failure_type]))
-    for failtype in sorted_failures:
-        job_uuids = failure_stats[failtype]
-        failstat = "  {: <25s} {:4d} ({: >4.0%})\n".format(
-            failtype,
-            len(job_uuids),
-            len(job_uuids) / float(len(jobs_by_state['Failed'])))
-        failure_summary = failure_summary + failstat
-        failure_detail = failure_detail + failstat
-        for j in job_uuids:
-            job_info = jobs_failed_map[j]
-            job_owner = job_user_name(api, job_info['modified_by_user_uuid'])
-            job_name = job_pipeline_name(api, job_info['uuid'])
-            failure_detail = failure_detail + "    {}  {: <15.15s}  {:29.29s}\n".format(j, job_owner, job_name)
-        failure_detail = failure_detail + "\n"
-
-    print "Failures by class"
-    print ""
-    print failure_summary
-
-    print "Failures by class (detail)"
-    print ""
-    print failure_detail
-
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/services/api/script/fail-jobs.rb b/services/api/script/fail-jobs.rb
deleted file mode 100755
index e52bfc075..000000000
--- a/services/api/script/fail-jobs.rb
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env ruby
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-require 'optimist'
-
-opts = Optimist::options do
-  banner 'Fail jobs that have state=="Running".'
-  banner 'Options:'
-  opt(:before,
-      'fail only jobs that started before the given time (or "reboot")',
-      type: :string)
-end
-
-ENV["RAILS_ENV"] = ARGV[0] || ENV["RAILS_ENV"] || "development"
-require File.dirname(__FILE__) + '/../config/boot'
-require File.dirname(__FILE__) + '/../config/environment'
-require Rails.root.join('lib/crunch_dispatch.rb')
-
-CrunchDispatch.new.fail_jobs before: opts[:before]

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list