[ARVADOS] created: fb5d279228a7adcafd858d9e137accd8010bb382
Git user
git at public.curoverse.com
Thu May 11 11:38:47 EDT 2017
at fb5d279228a7adcafd858d9e137accd8010bb382 (commit)
commit fb5d279228a7adcafd858d9e137accd8010bb382
Author: Tom Clegg <tom at curoverse.com>
Date: Thu May 11 11:38:37 2017 -0400
11590: Add "reuse jobs even if conflicting outputs exist" and "log reuse decisions" config options.
diff --git a/services/api/app/models/job.rb b/services/api/app/models/job.rb
index 90fba3c..51f2df4 100644
--- a/services/api/app/models/job.rb
+++ b/services/api/app/models/job.rb
@@ -198,6 +198,16 @@ class Job < ArvadosModel
filters
end
+ # reuselog logs whatever the given block returns, if reuse logging
+ # is enabled. It accepts a block instead of a string because in some
+ # cases constructing the strings involves doing database queries,
+ # and we want to skip those queries when logging is disabled.
+ def self.reuselog
+ if Rails.configuration.log_reuse_decisions
+ Rails.logger.info("find_reusable: " + yield)
+ end
+ end
+
def self.find_reusable attrs, params, filters, read_users
if filters.empty? # Translate older creation parameters into filters.
filters =
@@ -237,32 +247,57 @@ class Job < ArvadosModel
end
# Search for a reusable Job, and return it if found.
- candidates = Job.
- readable_by(current_user).
- where('state = ? or (owner_uuid = ? and state in (?))',
- Job::Complete, current_user.uuid, [Job::Queued, Job::Running]).
- where('script_parameters_digest = ?', Job.sorted_hash_digest(attrs[:script_parameters])).
- where('nondeterministic is distinct from ?', true).
- order('state desc, created_at') # prefer Running jobs over Queued
+ candidates = Job.readable_by(current_user).where(
+ 'state = ? or (owner_uuid = ? and state in (?))',
+ Job::Complete, current_user.uuid, [Job::Queued, Job::Running])
+ reuselog { "have #{candidates.count} readable jobs in a reusable state" }
+
+ digest = Job.sorted_hash_digest(attrs[:script_parameters])
+ candidates = candidates.where('script_parameters_digest = ?', digest)
+ reuselog { "have #{candidates.count} candidates after filtering on script_parameters_digest #{digest}" }
+
+ candidates = candidates.where('nondeterministic is distinct from ?', true)
+ reuselog { "have #{candidates.count} candidates after filtering on !nondeterministic" }
+
+ # prefer Running jobs over Queued
+ candidates = candidates.order('state desc, created_at')
+
candidates = apply_filters candidates, filters
+ reuselog { "have #{candidates.count} candidates after filtering on repo, script, and custom filters #{filters.inspect}" }
+
chosen = nil
incomplete_job = nil
candidates.each do |j|
if j.state != Job::Complete
- # We'll use this if we don't find a job that has completed
- incomplete_job ||= j
- next
- end
-
- if chosen == false
- # We have already decided not to reuse any completed job
- next
+ if !incomplete_job
+ # We'll use this if we don't find a job that has completed
+ reuselog { "job #{j.uuid} is reusable, but unfinished; continuing search for completed jobs" }
+ incomplete_job = j
+ else
+ reuselog { "job #{j.uuid} is reusable, but unfinished, so not better than #{incomplete_job.uuid}; ignoring" }
+ end
+ elsif chosen == false
+ # Ignore: we have already decided not to reuse any completed
+ # job.
+ reuselog { "job #{j.uuid} output #{j.output} ignored, see above" }
+ elsif Rails.configuration.reuse_job_if_outputs_differ
+ if Collection.readable_by(current_user).find_by_portable_data_hash(j.output)
+ reuselog { "job #{j.uuid} with output #{j.output} is reusable; decision is final." }
+ return j
+ else
+ # Ignore: keep locking for an incomplete job or one whose
+ # output is readable.
+ reuselog { "job #{j.uuid} output #{j.output} unavailable to user; continuing search" }
+ end
elsif chosen
if chosen.output != j.output
# If two matching jobs produced different outputs, run a new
# job (or use one that's already running/queued) instead of
# choosing one arbitrarily.
+ reuselog { "job #{j.uuid} output #{j.output} disagrees; forgetting about #{chosen.uuid} and ignoring any other finished jobs (see reuse_job_if_outputs_differ in application.default.yml)" }
chosen = false
+ else
+ reuselog { "job #{j.uuid} output #{j.output} agrees with chosen #{chosen.uuid}; continuing search in case others disagree" }
end
# ...and that's the only thing we need to do once we've chosen
# a job to reuse.
@@ -270,12 +305,20 @@ class Job < ArvadosModel
# As soon as the output we will end up returning (if any) is
# decided, check whether it will be visible to the user; if
# not, any further investigation of reusable jobs is futile.
+ reuselog { "job #{j.uuid} output #{j.output} unavailable to user; now refusing to reuse any finished job" }
chosen = false
else
+ reuselog { "job #{j.uuid} output #{j.output} can be reused; continuing search in case others disagree" }
chosen = j
end
end
- chosen || incomplete_job
+ j = chosen || incomplete_job
+ if j
+ reuselog { "done, #{j.uuid} was selected" }
+ else
+ reuselog { "done, nothing suitable" }
+ end
+ return j
end
def self.default_git_filters(attr_name, repo_name, refspec)
diff --git a/services/api/config/application.default.yml b/services/api/config/application.default.yml
index 32c7b24..c2f8958 100644
--- a/services/api/config/application.default.yml
+++ b/services/api/config/application.default.yml
@@ -358,6 +358,27 @@ common:
###
+ ### Job and container reuse logic.
+ ###
+
+ # Include details about job reuse decisions in the server log. This
+ # causes additional database queries to run, so it should not be
+ # enabled unless you expect to examine the resulting logs for
+ # troubleshooting purposes.
+ log_reuse_decisions: false
+
+ # Control job reuse behavior when two completed jobs match the
+ # search criteria and have different outputs.
+ #
+ # If true, in case of a conflict, reuse the earliest job (this is
+ # similar to container reuse behavior).
+ #
+ # If false, in case of a conflict, do not reuse any completed job,
+ # but do reuse an already-running job if available (this is the
+ # original job reuse behavior, and is still the default).
+ reuse_job_if_outputs_differ: false
+
+ ###
### Remaining assorted configuration options.
###
diff --git a/services/api/test/unit/job_test.rb b/services/api/test/unit/job_test.rb
index 9241465..e0a4f84 100644
--- a/services/api/test/unit/job_test.rb
+++ b/services/api/test/unit/job_test.rb
@@ -557,7 +557,18 @@ class JobTest < ActiveSupport::TestCase
assert_equal Job.deep_sort_hash(a).to_json, Job.deep_sort_hash(b).to_json
end
- test 'find_reusable' do
+ test 'find_reusable without logging' do
+ Rails.logger.expects(:info).never
+ try_find_reusable
+ end
+
+ test 'find_reusable with logging' do
+ Rails.configuration.log_reuse_decisions = true
+ Rails.logger.expects(:info).at_least(3)
+ try_find_reusable
+ end
+
+ def try_find_reusable
foobar = jobs(:foobar)
example_attrs = {
script_version: foobar.script_version,
@@ -577,6 +588,11 @@ class JobTest < ActiveSupport::TestCase
Job.where(uuid: jobs(:job_with_latest_version).uuid).
update_all(output: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+1')
assert_nil Job.find_reusable(example_attrs, {}, [], [users(:active)])
+
+ # ...unless config says to reuse the earlier job in such cases.
+ Rails.configuration.reuse_job_if_outputs_differ = true
+ j = Job.find_reusable(example_attrs, {}, [], [users(:active)])
+ assert_equal foobar.uuid, j.uuid
end
[
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list