[ARVADOS] created: 807e72e0c7c4a023f3b2e580c9dbcdfd025106a3

git at public.curoverse.com git at public.curoverse.com
Fri Apr 3 15:39:37 EDT 2015


        at  807e72e0c7c4a023f3b2e580c9dbcdfd025106a3 (commit)


commit 807e72e0c7c4a023f3b2e580c9dbcdfd025106a3
Author: Brett Smith <brett at curoverse.com>
Date:   Fri Apr 3 13:55:11 2015 -0400

    5642: crunch-job uses higher memory limit when running few tasks.
    
    This gives a task all of the node's RAM when it's the only task
    running this round, which is more in line with user expectations.

diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 1f29a28..ebca7a5 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -653,6 +653,10 @@ update_progress_stats();
 
 
 THISROUND:
+my $tasks_this_level = 0;
+foreach my $id (@jobstep_todo) {
+  $tasks_this_level++ if ($jobstep[$id]->{level} == $level);
+}
 for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
 {
   my $id = $jobstep_todo[$todo_ptr];
@@ -710,6 +714,11 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
 
     $ENV{"GZIP"} = "-n";
 
+    my $max_node_concurrent_tasks = $ENV{CRUNCH_NODE_SLOTS};
+    if ($tasks_this_level < $max_node_concurrent_tasks) {
+      $max_node_concurrent_tasks = $tasks_this_level;
+    }
+
     my @srunargs = (
       "srun",
       "--nodelist=".$childnode->{name},
@@ -724,7 +733,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
         # $command.  No tool is expected to read these values directly.
         .q{&& MEM=$(awk '($1 == "MemTotal:"){print $2}' </proc/meminfo) }
         .q{&& SWAP=$(awk '($1 == "SwapTotal:"){print $2}' </proc/meminfo) }
-        ."&& MEMLIMIT=\$(( (\$MEM * 95) / ($ENV{CRUNCH_NODE_SLOTS} * 100) )) "
+        ."&& MEMLIMIT=\$(( (\$MEM * 95) / ($max_node_concurrent_tasks * 100) )) "
         ."&& let SWAPLIMIT=\$MEMLIMIT+\$SWAP";
     $command .= "&& exec arv-mount --by-id --allow-other $ENV{TASK_KEEPMOUNT} --exec ";
     if ($docker_hash)

commit 8f238ab3e247f91c62a504247408de2ab6f847d4
Author: Brett Smith <brett at curoverse.com>
Date:   Fri Apr 3 13:41:18 2015 -0400

    5642: Explicitly make all swap available under Docker in crunch-job.
    
    Without this, Docker 1.2 through 1.5 send subprocesses SIGKILL if they
    exceed the memory limit.  Refer to #5642 for an example.
    
    --memory-swap requires Docker 1.3+.

diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 294696c..1f29a28 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -720,14 +720,18 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
 	"if [ -e $ENV{TASK_WORK} ]; then rm -rf $ENV{TASK_WORK}; fi; "
         ."mkdir -p $ENV{CRUNCH_TMP} $ENV{JOB_WORK} $ENV{TASK_WORK} $ENV{TASK_KEEPMOUNT} "
 	."&& cd $ENV{CRUNCH_TMP} "
-        ."&& MEM=\$(cat /proc/meminfo | grep MemTotal | sed 's/\\s\\s*/ /g' |cut -d' ' -f2) "
-        ."&& MEMLIMIT=\$(( (\$MEM * 95) / ($ENV{CRUNCH_NODE_SLOTS} * 100) )) ";
+        # These environment variables get used explicitly later in
+        # $command.  No tool is expected to read these values directly.
+        .q{&& MEM=$(awk '($1 == "MemTotal:"){print $2}' </proc/meminfo) }
+        .q{&& SWAP=$(awk '($1 == "SwapTotal:"){print $2}' </proc/meminfo) }
+        ."&& MEMLIMIT=\$(( (\$MEM * 95) / ($ENV{CRUNCH_NODE_SLOTS} * 100) )) "
+        ."&& let SWAPLIMIT=\$MEMLIMIT+\$SWAP";
     $command .= "&& exec arv-mount --by-id --allow-other $ENV{TASK_KEEPMOUNT} --exec ";
     if ($docker_hash)
     {
       my $cidfile = "$ENV{CRUNCH_TMP}/$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}.cid";
       $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 ";
-      $command .= "$docker_bin run --rm=true --attach=stdout --attach=stderr --attach=stdin -i --user=crunch --cidfile=$cidfile --sig-proxy --memory=\${MEMLIMIT}k ";
+      $command .= "$docker_bin run --rm=true --attach=stdout --attach=stderr --attach=stdin -i --user=crunch --cidfile=$cidfile --sig-proxy --memory=\${MEMLIMIT}k --memory-swap=\${SWAPLIMIT}k ";
 
       # Dynamically configure the container to use the host system as its
       # DNS server.  Get the host's global addresses from the ip command,

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list