[ARVADOS] created: 1d135e43d06148b100fbc8534144b405e4824af9
git at public.curoverse.com
git at public.curoverse.com
Thu Sep 25 15:32:04 EDT 2014
at 1d135e43d06148b100fbc8534144b405e4824af9 (commit)
commit 1d135e43d06148b100fbc8534144b405e4824af9
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Sep 25 15:24:52 2014 -0400
3988: Add note to Job.state. Replace logic to compute state based on
success/running columns with the same logic as used on apiserver.
diff --git a/apps/workbench/app/models/job.rb b/apps/workbench/app/models/job.rb
index d3d38b0..f933c07 100644
--- a/apps/workbench/app/models/job.rb
+++ b/apps/workbench/app/models/job.rb
@@ -46,20 +46,23 @@ class Job < ArvadosBase
arvados_api_client.unpack_api_response arvados_api_client.api("jobs/", "queue", {"_method"=> "GET"})
end
+ # The 'job' parameter can be either a Job model object, or a hash containing
+ # the same fields as a Job object (such as the :job entry of a pipeline
+ # component).
def self.state job
+ # This has a valid state method on it so call that
if job.respond_to? :state and job.state
return job.state
end
- if not job[:cancelled_at].nil?
+ # Figure out the state based on the other fields.
+ if job[:cancelled_at]
"Cancelled"
- elsif not job[:finished_at].nil? or not job[:success].nil?
- if job[:success]
- "Completed"
- else
- "Failed"
- end
- elsif job[:running]
+ elsif job[:success] == false
+ "Failed"
+ elsif job[:success] == true
+ "Complete"
+ elsif job[:running] == true
"Running"
else
"Queued"
commit 2861857a87d4c40924f783862ee09e91cec0b96f
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Sep 25 15:01:44 2014 -0400
3899: Shorten count of ended/succeed/failed in a-r-p-i with some clever Ruby.
Improve reporting to test for state == Cancelled instead of looking at
cancelled_at, and distinguish between cancelled the state being unexpectedly
changed to success/failed. pipeline_instances_helper uses state attribute.
diff --git a/apps/workbench/app/helpers/pipeline_instances_helper.rb b/apps/workbench/app/helpers/pipeline_instances_helper.rb
index 7e5324b..ca48306 100644
--- a/apps/workbench/app/helpers/pipeline_instances_helper.rb
+++ b/apps/workbench/app/helpers/pipeline_instances_helper.rb
@@ -132,19 +132,25 @@ module PipelineInstancesHelper
pj[:progress] = 0.0
end
end
- if pj[:job][:success]
+
+ case pj[:job][:state]
+ when 'Complete'
pj[:result] = 'complete'
pj[:labeltype] = 'success'
pj[:complete] = true
pj[:progress] = 1.0
- elsif pj[:job][:finished_at]
+ when 'Failed'
pj[:result] = 'failed'
pj[:labeltype] = 'danger'
pj[:failed] = true
- elsif pj[:job][:started_at]
+ when 'Cancelled'
+ pj[:result] = 'cancelled'
+ pj[:labeltype] = 'danger'
+ pj[:failed] = true
+ when 'Running'
pj[:result] = 'running'
pj[:labeltype] = 'primary'
- elsif pj[:job][:uuid]
+ when 'Queued'
pj[:result] = 'queued'
pj[:labeltype] = 'default'
else
diff --git a/apps/workbench/app/views/application/_job_status_label.html.erb b/apps/workbench/app/views/application/_job_status_label.html.erb
index ece8167..0d012db 100644
--- a/apps/workbench/app/views/application/_job_status_label.html.erb
+++ b/apps/workbench/app/views/application/_job_status_label.html.erb
@@ -1,4 +1,4 @@
-<% status = Job.state j %>
+<% status = Job::state j %>
<% to_label = {
"Cancelled" => "danger",
"Complete" => "success",
diff --git a/apps/workbench/app/views/pipeline_instances/_running_component.html.erb b/apps/workbench/app/views/pipeline_instances/_running_component.html.erb
index 1d52e28..be3aba8 100644
--- a/apps/workbench/app/views/pipeline_instances/_running_component.html.erb
+++ b/apps/workbench/app/views/pipeline_instances/_running_component.html.erb
@@ -31,7 +31,7 @@
<% end %>
</div>
- <% if Job::state(current_job).in? ["Complete", "Failed", "Canceled"] %>
+ <% if Job::state(current_job).in? ["Complete", "Failed", "Cancelled"] %>
<div class="col-md-5 text-overflow-ellipsis">
<% if pj[:output_uuid] %>
<%= link_to_if_arvados_object pj[:output_uuid], friendly_name: true %>
diff --git a/sdk/cli/bin/arv-run-pipeline-instance b/sdk/cli/bin/arv-run-pipeline-instance
index 040a71d..ded7ab1 100755
--- a/sdk/cli/bin/arv-run-pipeline-instance
+++ b/sdk/cli/bin/arv-run-pipeline-instance
@@ -686,17 +686,12 @@ class WhRunPipelineInstance
end
end
- ended = @components.map { |cname, c|
- if c[:job] and ["Complete", "Failed", "Cancelled"].include? c[:job][:state] then 1 else 0 end
- }.reduce(:+) || 0
-
- succeeded = @components.map { |cname, c|
- if c[:job] and ["Complete"].include? c[:job][:state] then 1 else 0 end
- }.reduce(:+) || 0
-
- failed = @components.map { |cname, c|
- if c[:job] and ["Failed", "Cancelled"].include? c[:job][:state] then 1 else 0 end
- }.reduce(:+) || 0
+ c_in_state = @components.values.group_by { |c|
+ c[:job] and c[:job][:state]
+ }
+ succeeded = c_in_state["Complete"].count
+ failed = c_in_state["Failed"].count + c_in_state["Cancelled"].count
+ ended = succeeded + failed
success = (succeeded == @components.length)
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index fbd7bef..f56099d 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -1042,8 +1042,11 @@ sub check_refresh_wanted
$Job->{$attr} = $Job2->{$attr};
}
if ($Job->{'state'} ne "Running") {
- Log (undef, "Job state changed to " . $Job->{'state'} . " at " . $Job->{cancelled_at} .
- " by user " . $Job->{cancelled_by_user_uuid});
+ if ($Job->{'state'} eq "Cancelled") {
+ Log (undef, "Job cancelled at " . $Job->{'cancelled_at'} . " by user " . $Job->{'cancelled_by_user_uuid'});
+ } else {
+ Log (undef, "Job state unexpectedly changed to " . $Job->{'state'});
+ }
$main::success = 0;
$main::please_freeze = 1;
}
@@ -1341,9 +1344,8 @@ sub croak
sub cleanup
{
return if !$job_has_uuid;
- if ($Job->{'cancelled_at'}) {
- $Job->update_attributes('state' => 'Cancelled',
- 'finished_at' => scalar gmtime);
+ if ($Job->{'state'} eq 'Cancelled') {
+ $Job->update_attributes('finished_at' => scalar gmtime);
} else {
$Job->update_attributes('state' => 'Failed');
}
commit 9daebff7dfeaa7092da078f3a0bfdc0c9d8e51e0
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Sep 25 09:16:14 2014 -0400
3988: Job.queue uses "state = Queued" instead of previous multi-column-null test.
diff --git a/services/api/app/models/job.rb b/services/api/app/models/job.rb
index b4aa625..1bf17cc 100644
--- a/services/api/app/models/job.rb
+++ b/services/api/app/models/job.rb
@@ -64,9 +64,7 @@ class Job < ArvadosModel
end
def self.queue
- self.where('started_at is ? and is_locked_by_uuid is ? and cancelled_at is ? and success is ?',
- nil, nil, nil, nil).
- order('priority desc, created_at')
+ self.where('state = ?', Queued).order('priority desc, created_at')
end
def queue_position
commit 3d84dd62a85647e9b3dfc34af8303c09b9923498
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Sep 24 14:36:21 2014 -0400
3899: crunch-dispatch uses state column to mark crashed jobs as failed.
diff --git a/services/api/script/crunch-dispatch.rb b/services/api/script/crunch-dispatch.rb
index 08ea229..69c2d57 100755
--- a/services/api/script/crunch-dispatch.rb
+++ b/services/api/script/crunch-dispatch.rb
@@ -417,14 +417,11 @@ class Dispatcher
exit_status = j_done[:wait_thr].value
jobrecord = Job.find_by_uuid(job_done.uuid)
- if exit_status.to_i != 75 and jobrecord.started_at
- # Clean up state fields in case crunch-job exited without
- # putting the job in a suitable "finished" state.
- jobrecord.running = false
- jobrecord.finished_at ||= Time.now
- if jobrecord.success.nil?
- jobrecord.success = false
- end
+ if exit_status.to_i != 75 and jobrecord.state == "Running"
+ # crunch-job did not return exit code 75 (see below) and left the job in
+ # the "Running" state, which means there was an unhandled error. Fail
+ # the job.
+ jobrecord.state = "Failed"
jobrecord.save!
else
# Don't fail the job if crunch-job didn't even get as far as
commit 93c7127afd21f82662e9b471e7c617ae8c59ad19
Merge: 68dc1fc 2a8d349
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Sep 24 14:21:50 2014 -0400
Merge remote-tracking branch 'origin/master' into 3988-crunch-use-job-state
commit 68dc1fcaec00d7d954be19f2c184e04626639616
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Sep 24 14:11:33 2014 -0400
3899: Fix syntax errors in arv-run-pipeline-instance, and update job record if
job is queued or running.
diff --git a/sdk/cli/bin/arv-run-pipeline-instance b/sdk/cli/bin/arv-run-pipeline-instance
index bc87c5d..040a71d 100755
--- a/sdk/cli/bin/arv-run-pipeline-instance
+++ b/sdk/cli/bin/arv-run-pipeline-instance
@@ -509,7 +509,7 @@ class WhRunPipelineInstance
# the job's current state")
c_already_finished = (c[:job] &&
c[:job][:uuid] &&
- ["Complete", "Failed", "Cancelled"].include? c[:job][:state])
+ ["Complete", "Failed", "Cancelled"].include?(c[:job][:state]))
if !c[:job] and
c[:script_parameters].select { |pname, p| p.is_a? Hash and p[:output_of]}.empty?
# No job yet associated with this component and is component inputs
@@ -526,7 +526,7 @@ class WhRunPipelineInstance
:owner_uuid => owner_uuid,
:is_locked_by_uuid => (@options[:run_jobs_here] ? owner_uuid : nil),
:submit_id => my_submit_id,
- :state => (if @options[:run_jobs_here] then "Running" else "Queued")
+ :state => (if @options[:run_jobs_here] then "Running" else "Queued" end)
}, {
# This is the right place to put these attributes when
# dealing with new API servers.
@@ -582,8 +582,8 @@ class WhRunPipelineInstance
end
if c[:job] and c[:job][:uuid]
- if c[:job][:state] == "Running"
- # Job is running so update copy of job record
+ if ["Running", "Queued"].include?(c[:job][:state])
+ # Job is running (or may be soon) so update copy of job record
c[:job] = JobCache.get(c[:job][:uuid])
end
@@ -655,8 +655,11 @@ class WhRunPipelineInstance
elsif c[:job][:state] == "Running"
# Job is still running
moretodo = true
- elsif c[:job][:cancelled_at]
+ elsif c[:job][:state] == "Cancelled"
debuglog "component #{cname} job #{c[:job][:uuid]} cancelled."
+ moretodo = false
+ elsif c[:job][:state] == "Failed"
+ moretodo = false
end
end
end
@@ -759,16 +762,18 @@ class WhRunPipelineInstance
@components.each do |cname, c|
jstatus = if !c[:job]
"-"
- elsif c[:job][:state] == "Running"
- "#{c[:job][:tasks_summary].inspect}"
- elsif c[:job][:state] == "Complete"
- c[:job][:output]
- elsif c[:job][:state] == "Cancelled"
- "cancelled #{c[:job][:cancelled_at]}"
- elsif c[:job][:state] == "Failed"
- "failed #{c[:job][:finished_at]}"
- elsif c[:job][:state] == "Queued"
- "queued #{c[:job][:created_at]}"
+ else case c[:job][:state]
+ when "Running"
+ "#{c[:job][:tasks_summary].inspect}"
+ when "Complete"
+ c[:job][:output]
+ when "Cancelled"
+ "cancelled #{c[:job][:cancelled_at]}"
+ when "Failed"
+ "failed #{c[:job][:finished_at]}"
+ when "Queued"
+ "queued #{c[:job][:created_at]}"
+ end
end
f.puts "#{cname.to_s.ljust namewidth} #{c[:job] ? c[:job][:uuid] : '-'.ljust(27)} #{jstatus}"
end
commit 4c53b3aee935b41e11c71ff1468a10d6a80c390c
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Sep 24 13:32:01 2014 -0400
3988: look at job state instead of cancelled_at to determine if the job should
be stopped.
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 3dd1627..fbd7bef 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -1037,11 +1037,12 @@ sub check_refresh_wanted
my $Job2 = $arv->{'jobs'}->{'get'}->execute('uuid' => $jobspec);
for my $attr ('cancelled_at',
'cancelled_by_user_uuid',
- 'cancelled_by_client_uuid') {
+ 'cancelled_by_client_uuid',
+ 'state') {
$Job->{$attr} = $Job2->{$attr};
}
- if ($Job->{'cancelled_at'}) {
- Log (undef, "Job cancelled at " . $Job->{cancelled_at} .
+ if ($Job->{'state'} ne "Running") {
+ Log (undef, "Job state changed to " . $Job->{'state'} . " at " . $Job->{cancelled_at} .
" by user " . $Job->{cancelled_by_user_uuid});
$main::success = 0;
$main::please_freeze = 1;
commit c8ad3a2a0f27964eff5d5c2d845afab728836473
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Sep 24 13:28:58 2014 -0400
3988: Update crunch-job, arv-run-pipeline-instance and workbench to read/write
"state" field of jobs instead of using running/success (+
cancelled_at/started_at/finished_at) to figure out what state the job is in.
diff --git a/apps/workbench/app/views/jobs/_show_recent.html.erb b/apps/workbench/app/views/jobs/_show_recent.html.erb
index b19b7d9..c823fc5 100644
--- a/apps/workbench/app/views/jobs/_show_recent.html.erb
+++ b/apps/workbench/app/views/jobs/_show_recent.html.erb
@@ -83,7 +83,7 @@
<td>
<% if j.finished_at.is_a? Time %>
<%= raw('ran ' + distance_of_time_in_words(j.finished_at, j.started_at).sub('about ','~').sub(' ',' ')) %>
- <% elsif j.running %>
+ <% elsif j.state == "Running" %>
<span class="badge badge-success" title="tasks finished">✔ <%= j.tasks_summary[:done] %></span>
<span class="badge badge-info" title="tasks running">✈ <%= j.tasks_summary[:running] %></span>
<span class="badge" title="tasks todo">✉ <%= j.tasks_summary[:todo] %></span>
diff --git a/apps/workbench/app/views/jobs/show.html.erb b/apps/workbench/app/views/jobs/show.html.erb
index d3047ce..276aec5 100644
--- a/apps/workbench/app/views/jobs/show.html.erb
+++ b/apps/workbench/app/views/jobs/show.html.erb
@@ -1,5 +1,5 @@
<% content_for :tab_line_buttons do %>
- <% if @object.running %>
+ <% if @object.state == "Running" %>
<%= form_tag "/jobs/#{@object.uuid}/cancel", style: "display:inline; padding-left: 1em" do |f| %>
<%= button_tag "Cancel running job", {class: 'btn btn-sm btn-danger', id: "cancel-job-button"} %>
<% end %>
diff --git a/apps/workbench/app/views/users/_tables.html.erb b/apps/workbench/app/views/users/_tables.html.erb
index ebb5201..acde5ce 100644
--- a/apps/workbench/app/views/users/_tables.html.erb
+++ b/apps/workbench/app/views/users/_tables.html.erb
@@ -59,7 +59,7 @@
<td>
<small>
- <% if j.success and j.output %>
+ <% if j.state == "Complete" and j.output %>
<a href="<%= collection_path(j.output) %>">
<% collections = collections_for_object(j.output) %>
<% if collections && !collections.empty? %>
diff --git a/sdk/cli/bin/arv-run-pipeline-instance b/sdk/cli/bin/arv-run-pipeline-instance
index 472c20b..bc87c5d 100755
--- a/sdk/cli/bin/arv-run-pipeline-instance
+++ b/sdk/cli/bin/arv-run-pipeline-instance
@@ -509,7 +509,7 @@ class WhRunPipelineInstance
# the job's current state")
c_already_finished = (c[:job] &&
c[:job][:uuid] &&
- !c[:job][:success].nil?)
+ ["Complete", "Failed", "Cancelled"].include? c[:job][:state])
if !c[:job] and
c[:script_parameters].select { |pname, p| p.is_a? Hash and p[:output_of]}.empty?
# No job yet associated with this component and is component inputs
@@ -526,6 +526,7 @@ class WhRunPipelineInstance
:owner_uuid => owner_uuid,
:is_locked_by_uuid => (@options[:run_jobs_here] ? owner_uuid : nil),
:submit_id => my_submit_id,
+ :state => (if @options[:run_jobs_here] then "Running" else "Queued")
}, {
# This is the right place to put these attributes when
# dealing with new API servers.
@@ -546,7 +547,7 @@ class WhRunPipelineInstance
end
end
- if c[:job] and c[:run_in_process] and c[:job][:success].nil?
+ if c[:job] and c[:run_in_process] and not ["Complete", "Failed", "Cancelled"].include? c[:job][:state]
report_status
begin
require 'open3'
@@ -575,21 +576,18 @@ class WhRunPipelineInstance
debuglog "Interrupted (#{e}). Failing job.", 0
$arv.job.update(uuid: c[:job][:uuid],
job: {
- finished_at: Time.now,
- running: false,
- success: false
+ state: "Failed"
})
end
end
if c[:job] and c[:job][:uuid]
- if (c[:job][:running] or
- not (c[:job][:finished_at] or c[:job][:cancelled_at]))
+ if c[:job][:state] == "Running"
# Job is running so update copy of job record
c[:job] = JobCache.get(c[:job][:uuid])
end
- if c[:job][:success]
+ if c[:job][:state] == "Complete"
# Populate script_parameters of other components waiting for
# this job
@components.each do |c2name, c2|
@@ -654,8 +652,7 @@ class WhRunPipelineInstance
end
end
end
- elsif c[:job][:running] ||
- (!c[:job][:started_at] && !c[:job][:cancelled_at])
+ elsif c[:job][:state] == "Running"
# Job is still running
moretodo = true
elsif c[:job][:cancelled_at]
@@ -686,21 +683,17 @@ class WhRunPipelineInstance
end
end
- ended = 0
- succeeded = 0
- failed = 0
- @components.each do |cname, c|
- if c[:job]
- if c[:job][:finished_at] or c[:job][:cancelled_at] or (c[:job][:running] == false and c[:job][:success] == false)
- ended += 1
- if c[:job][:success] == true
- succeeded += 1
- elsif c[:job][:success] == false or c[:job][:cancelled_at]
- failed += 1
- end
- end
- end
- end
+ ended = @components.map { |cname, c|
+ if c[:job] and ["Complete", "Failed", "Cancelled"].include? c[:job][:state] then 1 else 0 end
+ }.reduce(:+) || 0
+
+ succeeded = @components.map { |cname, c|
+ if c[:job] and ["Complete"].include? c[:job][:state] then 1 else 0 end
+ }.reduce(:+) || 0
+
+ failed = @components.map { |cname, c|
+ if c[:job] and ["Failed", "Cancelled"].include? c[:job][:state] then 1 else 0 end
+ }.reduce(:+) || 0
success = (succeeded == @components.length)
@@ -766,19 +759,15 @@ class WhRunPipelineInstance
@components.each do |cname, c|
jstatus = if !c[:job]
"-"
- elsif c[:job][:running]
+ elsif c[:job][:state] == "Running"
"#{c[:job][:tasks_summary].inspect}"
- elsif c[:job][:success]
+ elsif c[:job][:state] == "Complete"
c[:job][:output]
- elsif c[:job][:cancelled_at]
+ elsif c[:job][:state] == "Cancelled"
"cancelled #{c[:job][:cancelled_at]}"
- elsif c[:job][:finished_at]
+ elsif c[:job][:state] == "Failed"
"failed #{c[:job][:finished_at]}"
- elsif c[:job][:started_at]
- "started #{c[:job][:started_at]}"
- elsif c[:job][:is_locked_by_uuid]
- "starting #{c[:job][:started_at]}"
- else
+ elsif c[:job][:state] == "Queued"
"queued #{c[:job][:created_at]}"
end
f.puts "#{cname.to_s.ljust namewidth} #{c[:job] ? c[:job][:uuid] : '-'.ljust(27)} #{jstatus}"
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 70f379e..3dd1627 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -161,6 +161,10 @@ if ($job_has_uuid)
Log(undef, "Job is locked by " . $Job->{'is_locked_by_uuid'});
exit EX_TEMPFAIL;
}
+ if ($Job->{'state'} ne 'Queued') {
+ Log(undef, "Job state is " . $Job->{'state'} . ", but I can only start queued jobs.");
+ exit EX_TEMPFAIL;
+ }
if ($Job->{'success'} ne undef) {
Log(undef, "Job 'success' flag (" . $Job->{'success'} . ") is not null");
exit EX_TEMPFAIL;
@@ -287,9 +291,7 @@ if ($job_has_uuid)
Log(undef, "Error while updating / locking job, exiting ".EX_TEMPFAIL);
exit EX_TEMPFAIL;
}
- $Job->update_attributes('started_at' => scalar gmtime,
- 'running' => 1,
- 'success' => undef,
+ $Job->update_attributes('state' => 'Running',
'tasks_summary' => { 'failed' => 0,
'todo' => 1,
'running' => 0,
@@ -876,12 +878,14 @@ Log (undef, "finish");
save_meta();
if ($job_has_uuid) {
- $Job->update_attributes('running' => 0,
- 'success' => $collated_output && $main::success,
- 'finished_at' => scalar gmtime)
+ if ($collated_output && $main::success) {
+ $Job->update_attributes('state' => 'Complete')
+ } else {
+ $Job->update_attributes('state' => 'Failed')
+ }
}
-exit ($Job->{'success'} ? 1 : 0);
+exit ($Job->{'state'} != 'Complete' ? 1 : 0);
@@ -1336,9 +1340,12 @@ sub croak
sub cleanup
{
return if !$job_has_uuid;
- $Job->update_attributes('running' => 0,
- 'success' => 0,
- 'finished_at' => scalar gmtime);
+ if ($Job->{'cancelled_at'}) {
+ $Job->update_attributes('state' => 'Cancelled',
+ 'finished_at' => scalar gmtime);
+ } else {
+ $Job->update_attributes('state' => 'Failed');
+ }
}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list