[ARVADOS] created: 79deba45d38e64a10105526f4cf2d5fccbc4916a
git at public.curoverse.com
git at public.curoverse.com
Tue Sep 30 21:33:28 EDT 2014
at 79deba45d38e64a10105526f4cf2d5fccbc4916a (commit)
commit 79deba45d38e64a10105526f4cf2d5fccbc4916a
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Fri Sep 26 14:53:02 2014 -0400
3859: Add --local-job to explicitly specify whether job is "local" or not. Fix
overly clever code to count number of successes/failures in
arv-run-pipeline-instance.
diff --git a/sdk/cli/bin/arv-run-pipeline-instance b/sdk/cli/bin/arv-run-pipeline-instance
index ded7ab1..dbb00bd 100755
--- a/sdk/cli/bin/arv-run-pipeline-instance
+++ b/sdk/cli/bin/arv-run-pipeline-instance
@@ -551,7 +551,7 @@ class WhRunPipelineInstance
report_status
begin
require 'open3'
- Open3.popen3("arv-crunch-job", "--force-unlock",
+ Open3.popen3("arv-crunch-job", "--force-unlock", "--local-job",
"--job", c[:job][:uuid]) do |stdin, stdout, stderr, wait_thr|
debuglog "arv-crunch-job pid #{wait_thr.pid} started", 0
stdin.close
@@ -652,7 +652,7 @@ class WhRunPipelineInstance
end
end
end
- elsif c[:job][:state] == "Running"
+ elsif ["Queued", "Running"].include? c[:job][:state]
# Job is still running
moretodo = true
elsif c[:job][:state] == "Cancelled"
@@ -689,8 +689,8 @@ class WhRunPipelineInstance
c_in_state = @components.values.group_by { |c|
c[:job] and c[:job][:state]
}
- succeeded = c_in_state["Complete"].count
- failed = c_in_state["Failed"].count + c_in_state["Cancelled"].count
+ succeeded = c_in_state["Complete"].andand.count || 0
+ failed = (c_in_state["Failed"].andand.count || 0) + (c_in_state["Cancelled"].andand.count || 0)
ended = succeeded + failed
success = (succeeded == @components.length)
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 2a4675b..c0baa30 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -112,7 +112,9 @@ my $jobspec;
my $job_api_token;
my $no_clear_tmp;
my $resume_stash;
+my $local_job;
GetOptions('force-unlock' => \$force_unlock,
+ 'local-job' => \$local_job,
'git-dir=s' => \$git_dir,
'job=s' => \$jobspec,
'job-api-token=s' => \$job_api_token,
@@ -126,7 +128,9 @@ if (defined $job_api_token) {
my $have_slurm = exists $ENV{SLURM_JOBID} && exists $ENV{SLURM_NODELIST};
my $job_has_uuid = $jobspec =~ /^[-a-z\d]+$/;
-my $local_job = !$job_has_uuid;
+if (!$local_job) {
+ $local_job = !$job_has_uuid;
+}
$SIG{'USR1'} = sub
commit d79ca2ad99aceb528b1033a73035e6961aa25ba7
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Fri Sep 26 13:53:06 2014 -0400
3859: Move job locking up earlier in crunch-job. Don't try to lock the job
when running locally.
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index fffa9d1..2a4675b 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -153,30 +153,15 @@ if ($job_has_uuid)
{
$Job = $arv->{'jobs'}->{'get'}->execute('uuid' => $jobspec);
if (!$force_unlock) {
- # If some other crunch-job process has grabbed this job (or we see
- # other evidence that the job is already underway) we exit
- # EX_TEMPFAIL so crunch-dispatch (our parent process) doesn't
- # mark the job as failed.
- if ($Job->{'is_locked_by_uuid'}) {
- Log(undef, "Job is locked by " . $Job->{'is_locked_by_uuid'});
- exit EX_TEMPFAIL;
- }
- if ($Job->{'state'} ne 'Queued') {
- Log(undef, "Job state is " . $Job->{'state'} . ", but I can only start queued jobs.");
- exit EX_TEMPFAIL;
- }
- if ($Job->{'success'} ne undef) {
- Log(undef, "Job 'success' flag (" . $Job->{'success'} . ") is not null");
- exit EX_TEMPFAIL;
- }
- if ($Job->{'running'}) {
- Log(undef, "Job 'running' flag is already set");
- exit EX_TEMPFAIL;
- }
- if ($Job->{'started_at'}) {
- Log(undef, "Job 'started_at' time is already set (" . $Job->{'started_at'} . ")");
- exit EX_TEMPFAIL;
- }
+ # Claim this job, and make sure nobody else does
+ eval {
+ # lock() sets is_locked_by_uuid and changes state to Running.
+ $arv->{'jobs'}->{'lock'}->execute('uuid' => $Job->{'uuid'})
+ };
+ if ($@) {
+ Log(undef, "Error while locking job, exiting ".EX_TEMPFAIL);
+ exit EX_TEMPFAIL;
+ };
}
}
else
@@ -190,6 +175,7 @@ else
}
$Job->{'is_locked_by_uuid'} = $User->{'uuid'};
+ $Job->{'state'} = 'Running';
$Job->{'started_at'} = gmtime;
$Job = $arv->{'jobs'}->{'create'}->execute('job' => $Job);
@@ -281,27 +267,11 @@ foreach (@sinfo)
@slot = sort { $a->{cpu} <=> $b->{cpu} } @slot;
-
-my $jobmanager_id;
-if ($job_has_uuid)
-{
- # Claim this job, and make sure nobody else does
- eval {
- $arv->{'jobs'}->{'lock'}->execute('uuid' => $Job->{'uuid'})
- };
- if ($@) {
- Log(undef, "Error while locking job, exiting ".EX_TEMPFAIL);
- exit EX_TEMPFAIL;
- };
-
- # lock() above sets is_locked_by_uuid and changes state to Running.
- $Job->update_attributes(
- 'tasks_summary' => { 'failed' => 0,
- 'todo' => 1,
- 'running' => 0,
- 'done' => 0 });
-}
-
+$Job->update_attributes(
+ 'tasks_summary' => { 'failed' => 0,
+ 'todo' => 1,
+ 'running' => 0,
+ 'done' => 0 });
Log (undef, "start");
$SIG{'INT'} = sub { $main::please_freeze = 1; };
commit 73e06d599452789b1a1df1a8f9379e4c929b833b
Merge: 1bae4ec 1b189a0
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Fri Sep 26 11:03:15 2014 -0400
Merge branch 'master' into 3859-crunch-job-use-lock
Conflicts:
sdk/cli/bin/crunch-job
diff --cc sdk/cli/bin/crunch-job
index c04e2df,f56099d..fffa9d1
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@@ -282,20 -286,16 +286,20 @@@ my $jobmanager_id
if ($job_has_uuid)
{
# Claim this job, and make sure nobody else does
- unless ($Job->update_attributes('is_locked_by_uuid' => $User->{'uuid'}) &&
- $Job->{'is_locked_by_uuid'} == $User->{'uuid'}) {
- Log(undef, "Error while updating / locking job, exiting ".EX_TEMPFAIL);
- exit EX_TEMPFAIL;
- }
- $Job->update_attributes('state' => 'Running',
- 'tasks_summary' => { 'failed' => 0,
- 'todo' => 1,
- 'running' => 0,
- 'done' => 0 });
+ eval {
+ $arv->{'jobs'}->{'lock'}->execute('uuid' => $Job->{'uuid'})
+ };
+ if ($@) {
- Log(undef, "Error while updating / locking job, exiting ".EX_TEMPFAIL);
++ Log(undef, "Error while locking job, exiting ".EX_TEMPFAIL);
+ exit EX_TEMPFAIL;
+ };
+
+ # lock() above sets is_locked_by_uuid and changes state to Running.
+ $Job->update_attributes(
+ 'tasks_summary' => { 'failed' => 0,
+ 'todo' => 1,
+ 'running' => 0,
+ 'done' => 0 });
}
commit 1bae4ec29e4181bfb41f1bf828f4764758e0026f
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Sep 25 14:16:55 2014 -0400
3859: Fix checking for errors in eval{}
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 4fe12c9..c04e2df 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -282,7 +282,10 @@ my $jobmanager_id;
if ($job_has_uuid)
{
# Claim this job, and make sure nobody else does
- if (eval {$arv->{'jobs'}->{'lock'}->execute('uuid' => $Job->{'uuid'})}) {
+ eval {
+ $arv->{'jobs'}->{'lock'}->execute('uuid' => $Job->{'uuid'})
+ };
+ if ($@) {
Log(undef, "Error while updating / locking job, exiting ".EX_TEMPFAIL);
exit EX_TEMPFAIL;
};
commit 1f4000e57a6c0e82ed2d7de311f0137833006120
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Sep 25 14:09:40 2014 -0400
3859: Use eval {} to catch error when locking the job.
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 6f5a699..4fe12c9 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -282,7 +282,7 @@ my $jobmanager_id;
if ($job_has_uuid)
{
# Claim this job, and make sure nobody else does
- unless ($arv->{'jobs'}->{'lock'}->execute('uuid' => $Job->{'uuid'})) {
+ if (eval {$arv->{'jobs'}->{'lock'}->execute('uuid' => $Job->{'uuid'})}) {
Log(undef, "Error while updating / locking job, exiting ".EX_TEMPFAIL);
exit EX_TEMPFAIL;
};
commit 375c4ecb4d0b2c4d71df83f2d858d1ed3e78fd5d
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Thu Sep 25 09:34:46 2014 -0400
3859: crunch-job use apiserver Job lock method instead of racy is_locked_by.
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 70f379e..6f5a699 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -282,18 +282,17 @@ my $jobmanager_id;
if ($job_has_uuid)
{
# Claim this job, and make sure nobody else does
- unless ($Job->update_attributes('is_locked_by_uuid' => $User->{'uuid'}) &&
- $Job->{'is_locked_by_uuid'} == $User->{'uuid'}) {
+ unless ($arv->{'jobs'}->{'lock'}->execute('uuid' => $Job->{'uuid'})) {
Log(undef, "Error while updating / locking job, exiting ".EX_TEMPFAIL);
- exit EX_TEMPFAIL;
- }
- $Job->update_attributes('started_at' => scalar gmtime,
- 'running' => 1,
- 'success' => undef,
- 'tasks_summary' => { 'failed' => 0,
- 'todo' => 1,
- 'running' => 0,
- 'done' => 0 });
+ exit EX_TEMPFAIL;
+ };
+
+ # lock() above sets is_locked_by_uuid and changes state to Running.
+ $Job->update_attributes(
+ 'tasks_summary' => { 'failed' => 0,
+ 'todo' => 1,
+ 'running' => 0,
+ 'done' => 0 });
}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list