[ARVADOS] created: 79deba45d38e64a10105526f4cf2d5fccbc4916a

git at public.curoverse.com git at public.curoverse.com
Tue Sep 30 21:33:28 EDT 2014


        at  79deba45d38e64a10105526f4cf2d5fccbc4916a (commit)


commit 79deba45d38e64a10105526f4cf2d5fccbc4916a
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Fri Sep 26 14:53:02 2014 -0400

    3859: Add --local-job to explicitly specify whether job is "local" or not.  Fix
    overly clever code to count number of successes/failures in
    arv-run-pipeline-instance.

diff --git a/sdk/cli/bin/arv-run-pipeline-instance b/sdk/cli/bin/arv-run-pipeline-instance
index ded7ab1..dbb00bd 100755
--- a/sdk/cli/bin/arv-run-pipeline-instance
+++ b/sdk/cli/bin/arv-run-pipeline-instance
@@ -551,7 +551,7 @@ class WhRunPipelineInstance
           report_status
           begin
             require 'open3'
-            Open3.popen3("arv-crunch-job", "--force-unlock",
+            Open3.popen3("arv-crunch-job", "--force-unlock", "--local-job",
                          "--job", c[:job][:uuid]) do |stdin, stdout, stderr, wait_thr|
               debuglog "arv-crunch-job pid #{wait_thr.pid} started", 0
               stdin.close
@@ -652,7 +652,7 @@ class WhRunPipelineInstance
                 end
               end
             end
-          elsif c[:job][:state] == "Running"
+          elsif ["Queued", "Running"].include? c[:job][:state]
             # Job is still running
             moretodo = true
           elsif c[:job][:state] == "Cancelled"
@@ -689,8 +689,8 @@ class WhRunPipelineInstance
     c_in_state = @components.values.group_by { |c| 
       c[:job] and c[:job][:state]
     }
-    succeeded = c_in_state["Complete"].count
-    failed = c_in_state["Failed"].count + c_in_state["Cancelled"].count
+    succeeded = c_in_state["Complete"].andand.count || 0
+    failed = (c_in_state["Failed"].andand.count || 0) + (c_in_state["Cancelled"].andand.count || 0)
     ended = succeeded + failed
 
     success = (succeeded == @components.length)
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 2a4675b..c0baa30 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -112,7 +112,9 @@ my $jobspec;
 my $job_api_token;
 my $no_clear_tmp;
 my $resume_stash;
+my $local_job;
 GetOptions('force-unlock' => \$force_unlock,
+           'local-job' => \$local_job,
            'git-dir=s' => \$git_dir,
            'job=s' => \$jobspec,
            'job-api-token=s' => \$job_api_token,
@@ -126,7 +128,9 @@ if (defined $job_api_token) {
 
 my $have_slurm = exists $ENV{SLURM_JOBID} && exists $ENV{SLURM_NODELIST};
 my $job_has_uuid = $jobspec =~ /^[-a-z\d]+$/;
-my $local_job = !$job_has_uuid;
+if (!$local_job) {
+  $local_job = !$job_has_uuid;
+}
 
 
 $SIG{'USR1'} = sub

commit d79ca2ad99aceb528b1033a73035e6961aa25ba7
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Fri Sep 26 13:53:06 2014 -0400

    3859: Move job locking up earlier in crunch-job.  Don't try to lock the job
    when running locally.

diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index fffa9d1..2a4675b 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -153,30 +153,15 @@ if ($job_has_uuid)
 {
   $Job = $arv->{'jobs'}->{'get'}->execute('uuid' => $jobspec);
   if (!$force_unlock) {
-    # If some other crunch-job process has grabbed this job (or we see
-    # other evidence that the job is already underway) we exit
-    # EX_TEMPFAIL so crunch-dispatch (our parent process) doesn't
-    # mark the job as failed.
-    if ($Job->{'is_locked_by_uuid'}) {
-      Log(undef, "Job is locked by " . $Job->{'is_locked_by_uuid'});
-      exit EX_TEMPFAIL;
-    }
-    if ($Job->{'state'} ne 'Queued') {
-      Log(undef, "Job state is " . $Job->{'state'} . ", but I can only start queued jobs.");
-      exit EX_TEMPFAIL;
-    }
-    if ($Job->{'success'} ne undef) {
-      Log(undef, "Job 'success' flag (" . $Job->{'success'} . ") is not null");
-      exit EX_TEMPFAIL;
-    }
-    if ($Job->{'running'}) {
-      Log(undef, "Job 'running' flag is already set");
-      exit EX_TEMPFAIL;
-    }
-    if ($Job->{'started_at'}) {
-      Log(undef, "Job 'started_at' time is already set (" . $Job->{'started_at'} . ")");
-      exit EX_TEMPFAIL;
-    }
+    # Claim this job, and make sure nobody else does
+    eval {
+      # lock() sets is_locked_by_uuid and changes state to Running.
+      $arv->{'jobs'}->{'lock'}->execute('uuid' => $Job->{'uuid'})
+    };
+    if ($@) {
+      Log(undef, "Error while locking job, exiting ".EX_TEMPFAIL);
+      exit EX_TEMPFAIL;    
+    };
   }
 }
 else
@@ -190,6 +175,7 @@ else
   }
 
   $Job->{'is_locked_by_uuid'} = $User->{'uuid'};
+  $Job->{'state'} = 'Running';
   $Job->{'started_at'} = gmtime;
 
   $Job = $arv->{'jobs'}->{'create'}->execute('job' => $Job);
@@ -281,27 +267,11 @@ foreach (@sinfo)
 @slot = sort { $a->{cpu} <=> $b->{cpu} } @slot;
 
 
-
-my $jobmanager_id;
-if ($job_has_uuid)
-{
-  # Claim this job, and make sure nobody else does
-  eval {
-    $arv->{'jobs'}->{'lock'}->execute('uuid' => $Job->{'uuid'})
-  };
-  if ($@) {
-    Log(undef, "Error while locking job, exiting ".EX_TEMPFAIL);
-    exit EX_TEMPFAIL;    
-  };
-
-  # lock() above sets is_locked_by_uuid and changes state to Running.
-  $Job->update_attributes(
-    'tasks_summary' => { 'failed' => 0,
-                         'todo' => 1,
-                         'running' => 0,
-                         'done' => 0 });
-}
-
+$Job->update_attributes(
+  'tasks_summary' => { 'failed' => 0,
+                       'todo' => 1,
+                       'running' => 0,
+                       'done' => 0 });
 
 Log (undef, "start");
 $SIG{'INT'} = sub { $main::please_freeze = 1; };

commit 73e06d599452789b1a1df1a8f9379e4c929b833b
Merge: 1bae4ec 1b189a0
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Fri Sep 26 11:03:15 2014 -0400

    Merge branch 'master' into 3859-crunch-job-use-lock
    
    Conflicts:
    	sdk/cli/bin/crunch-job

diff --cc sdk/cli/bin/crunch-job
index c04e2df,f56099d..fffa9d1
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@@ -282,20 -286,16 +286,20 @@@ my $jobmanager_id
  if ($job_has_uuid)
  {
    # Claim this job, and make sure nobody else does
 -  unless ($Job->update_attributes('is_locked_by_uuid' => $User->{'uuid'}) &&
 -          $Job->{'is_locked_by_uuid'} == $User->{'uuid'}) {
 -    Log(undef, "Error while updating / locking job, exiting ".EX_TEMPFAIL);
 -    exit EX_TEMPFAIL;
 -  }
 -  $Job->update_attributes('state' => 'Running',
 -                          'tasks_summary' => { 'failed' => 0,
 -                                               'todo' => 1,
 -                                               'running' => 0,
 -                                               'done' => 0 });
 +  eval {
 +    $arv->{'jobs'}->{'lock'}->execute('uuid' => $Job->{'uuid'})
 +  };
 +  if ($@) {
-     Log(undef, "Error while updating / locking job, exiting ".EX_TEMPFAIL);
++    Log(undef, "Error while locking job, exiting ".EX_TEMPFAIL);
 +    exit EX_TEMPFAIL;    
 +  };
 +
 +  # lock() above sets is_locked_by_uuid and changes state to Running.
 +  $Job->update_attributes(
 +    'tasks_summary' => { 'failed' => 0,
 +                         'todo' => 1,
 +                         'running' => 0,
 +                         'done' => 0 });
  }
  
  

commit 1bae4ec29e4181bfb41f1bf828f4764758e0026f
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Sep 25 14:16:55 2014 -0400

    3859: Fix checking for errors in eval{}

diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 4fe12c9..c04e2df 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -282,7 +282,10 @@ my $jobmanager_id;
 if ($job_has_uuid)
 {
   # Claim this job, and make sure nobody else does
-  if (eval {$arv->{'jobs'}->{'lock'}->execute('uuid' => $Job->{'uuid'})}) {
+  eval {
+    $arv->{'jobs'}->{'lock'}->execute('uuid' => $Job->{'uuid'})
+  };
+  if ($@) {
     Log(undef, "Error while updating / locking job, exiting ".EX_TEMPFAIL);
     exit EX_TEMPFAIL;    
   };

commit 1f4000e57a6c0e82ed2d7de311f0137833006120
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Sep 25 14:09:40 2014 -0400

    3859: Use eval {} to catch error when locking the job.

diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 6f5a699..4fe12c9 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -282,7 +282,7 @@ my $jobmanager_id;
 if ($job_has_uuid)
 {
   # Claim this job, and make sure nobody else does
-  unless ($arv->{'jobs'}->{'lock'}->execute('uuid' => $Job->{'uuid'})) {
+  if (eval {$arv->{'jobs'}->{'lock'}->execute('uuid' => $Job->{'uuid'})}) {
     Log(undef, "Error while updating / locking job, exiting ".EX_TEMPFAIL);
     exit EX_TEMPFAIL;    
   };

commit 375c4ecb4d0b2c4d71df83f2d858d1ed3e78fd5d
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Sep 25 09:34:46 2014 -0400

    3859: crunch-job use apiserver Job lock method instead of racy is_locked_by.

diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 70f379e..6f5a699 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -282,18 +282,17 @@ my $jobmanager_id;
 if ($job_has_uuid)
 {
   # Claim this job, and make sure nobody else does
-  unless ($Job->update_attributes('is_locked_by_uuid' => $User->{'uuid'}) &&
-          $Job->{'is_locked_by_uuid'} == $User->{'uuid'}) {
+  unless ($arv->{'jobs'}->{'lock'}->execute('uuid' => $Job->{'uuid'})) {
     Log(undef, "Error while updating / locking job, exiting ".EX_TEMPFAIL);
-    exit EX_TEMPFAIL;
-  }
-  $Job->update_attributes('started_at' => scalar gmtime,
-                          'running' => 1,
-                          'success' => undef,
-                          'tasks_summary' => { 'failed' => 0,
-                                               'todo' => 1,
-                                               'running' => 0,
-                                               'done' => 0 });
+    exit EX_TEMPFAIL;    
+  };
+
+  # lock() above sets is_locked_by_uuid and changes state to Running.
+  $Job->update_attributes(
+    'tasks_summary' => { 'failed' => 0,
+                         'todo' => 1,
+                         'running' => 0,
+                         'done' => 0 });
 }
 
 

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list