[ARVADOS] created: 882b71e217a39d77e9c34b9fcba59d7fd85b51d4
git at public.curoverse.com
git at public.curoverse.com
Thu Nov 26 21:01:21 EST 2015
at 882b71e217a39d77e9c34b9fcba59d7fd85b51d4 (commit)
commit 882b71e217a39d77e9c34b9fcba59d7fd85b51d4
Author: Tom Clegg <tom at curoverse.com>
Date: Thu Nov 26 21:11:16 2015 -0500
7751: Add convenience class for staging task output in $TASK_KEEPMOUNT_TMP.
diff --git a/crunch_scripts/test/task_output_dir b/crunch_scripts/test/task_output_dir
new file mode 100755
index 0000000..b177892
--- /dev/null
+++ b/crunch_scripts/test/task_output_dir
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+
+import arvados
+import arvados.crunch
+import hashlib
+import os
+
+out = arvados.crunch.TaskOutputDir()
+
+string = open(__file__).read()
+with open(os.path.join(out.path, 'example.out'), 'w') as f:
+ f.write(string)
+with open(os.path.join(out.path, 'example.out.SHA1'), 'w') as f:
+ f.write(hashlib.sha1(string).hexdigest() + "\n")
+
+arvados.current_task().set_output(out.manifest_text())
diff --git a/sdk/python/arvados/crunch.py b/sdk/python/arvados/crunch.py
new file mode 100644
index 0000000..c184e6a
--- /dev/null
+++ b/sdk/python/arvados/crunch.py
@@ -0,0 +1,27 @@
+import json
+import os
+
+class TaskOutputDir(object):
+ """Keep-backed directory for staging outputs of Crunch tasks.
+
+ Example, in a crunch task whose output is a file called "out.txt"
+ containing "42":
+
+ import arvados
+ import arvados.crunch
+ import os
+
+ out = arvados.crunch.TaskOutputDir()
+ with open(os.path.join(out.path, 'out.txt'), 'w') as f:
+ f.write('42')
+ arvados.current_task().set_output(out.manifest_text())
+ """
+ def __init__(self):
+ self.path = os.environ['TASK_KEEPMOUNT_TMP']
+
+ def __str__(self):
+ return self.path
+
+ def manifest_text(self):
+ snapshot = os.path.join(self.path, '.arvados#collection')
+ return json.load(open(snapshot))['manifest_text']
diff --git a/sdk/python/tests/test_crunch.py b/sdk/python/tests/test_crunch.py
new file mode 100644
index 0000000..431390b
--- /dev/null
+++ b/sdk/python/tests/test_crunch.py
@@ -0,0 +1,27 @@
+import arvados.crunch
+import os
+import shutil
+import tempfile
+import unittest
+
+class TaskOutputDirTest(unittest.TestCase):
+ def setUp(self):
+ self.tmp = tempfile.mkdtemp()
+ os.environ['TASK_KEEPMOUNT_TMP'] = self.tmp
+
+ def tearDown(self):
+ os.environ.pop('TASK_KEEPMOUNT_TMP')
+ shutil.rmtree(self.tmp)
+
+ def test_env_var(self):
+ out = arvados.crunch.TaskOutputDir()
+ self.assertEqual(out.path, self.tmp)
+
+ with open(os.path.join(self.tmp, '.arvados#collection'), 'w') as f:
+ f.write('{\n "manifest_text":"",\n "uuid":null\n}\n')
+ self.assertEqual(out.manifest_text(), '')
+
+ # Special file must be re-read on each call to manifest_text().
+ with open(os.path.join(self.tmp, '.arvados#collection'), 'w') as f:
+ f.write(r'{"manifest_text":". unparsed 0:3:foo\n","uuid":null}')
+ self.assertEqual(out.manifest_text(), ". unparsed 0:3:foo\n")
commit 326f9744f60d14566aa38a89f1e3e2999de9ab7c
Author: Tom Clegg <tom at curoverse.com>
Date: Thu Nov 26 20:52:10 2015 -0500
7751: Set up an arv-mount scratch directory for each task, and put its path in TASK_KEEPMOUNT_TMP.
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 5177434..53695e4 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -870,11 +870,12 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
$ENV{"TASK_SLOT_NUMBER"} = $slot[$childslot]->{cpu};
$ENV{"TASK_WORK"} = $ENV{"CRUNCH_TMP"}."/task/$childslotname";
$ENV{"HOME"} = $ENV{"TASK_WORK"};
- $ENV{"TASK_KEEPMOUNT"} = $ENV{"TASK_WORK"}.".keep";
$ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated
$ENV{"CRUNCH_NODE_SLOTS"} = $round_max_slots{$ENV{TASK_SLOT_NODE}};
$ENV{"PATH"} = $ENV{"CRUNCH_INSTALL"} . "/bin:" . $ENV{"PATH"};
+ my $keep_mnt = $ENV{"TASK_WORK"}.".keep";
+
$ENV{"GZIP"} = "-n";
my @srunargs = (
@@ -887,16 +888,20 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
my $stdbuf = " stdbuf --output=0 --error=0 ";
my $command =
- "if [ -e $ENV{TASK_WORK} ]; then rm -rf $ENV{TASK_WORK}; fi; "
- ."mkdir -p $ENV{CRUNCH_TMP} $ENV{JOB_WORK} $ENV{TASK_WORK} $ENV{TASK_KEEPMOUNT} "
- ."&& cd $ENV{CRUNCH_TMP} "
+ "if [ -e \Q$ENV{TASK_WORK}\E ]; then rm -rf \Q$ENV{TASK_WORK}\E; fi; "
+ ."mkdir -p \Q$ENV{CRUNCH_TMP}\E \Q$ENV{JOB_WORK}\E \Q$ENV{TASK_WORK}\E \Q$keep_mnt\E "
+ ."&& cd \Q$ENV{CRUNCH_TMP}\E "
# These environment variables get used explicitly later in
# $command. No tool is expected to read these values directly.
.q{&& MEM=$(awk '($1 == "MemTotal:"){print $2}' </proc/meminfo) }
.q{&& SWAP=$(awk '($1 == "SwapTotal:"){print $2}' </proc/meminfo) }
."&& MEMLIMIT=\$(( (\$MEM * 95) / ($ENV{CRUNCH_NODE_SLOTS} * 100) )) "
."&& let SWAPLIMIT=\$MEMLIMIT+\$SWAP ";
- $command .= "&& exec arv-mount --by-pdh --crunchstat-interval=10 --allow-other $ENV{TASK_KEEPMOUNT} --exec ";
+
+ $command .= "&& exec arv-mount --read-write --mount-by-pdh=by_pdh --mount-tmp=tmp --crunchstat-interval=10 --allow-other \Q$keep_mnt\E --exec ";
+ $ENV{TASK_KEEPMOUNT} = "$keep_mnt/by_pdh";
+ $ENV{TASK_KEEPMOUNT_TMP} = "$keep_mnt/tmp";
+
if ($docker_hash)
{
my $containername = "$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}";
@@ -917,14 +922,18 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
$command .= "--volume=\Q$ENV{CRUNCH_SRC}:$ENV{CRUNCH_SRC}:ro\E ";
$command .= "--volume=\Q$ENV{CRUNCH_INSTALL}:$ENV{CRUNCH_INSTALL}:ro\E ";
- # Currently, we make arv-mount's mount point appear at /keep
- # inside the container (instead of using the same path as the
- # host like we do with CRUNCH_SRC and CRUNCH_INSTALL). However,
- # crunch scripts and utilities must not rely on this. They must
- # use $TASK_KEEPMOUNT.
+ # Currently, we make the "by_pdh" directory in arv-mount's mount
+ # point appear at /keep inside the container (instead of using
+ # the same path as the host like we do with CRUNCH_SRC and
+ # CRUNCH_INSTALL). However, crunch scripts and utilities must
+ # not rely on this. They must use $TASK_KEEPMOUNT.
$command .= "--volume=\Q$ENV{TASK_KEEPMOUNT}:/keep:ro\E ";
$ENV{TASK_KEEPMOUNT} = "/keep";
+ # Ditto TASK_KEEPMOUNT_TMP, as /keep_tmp.
+ $command .= "--volume=\Q$ENV{TASK_KEEPMOUNT_TMP}:/keep_tmp:ro\E ";
+ $ENV{TASK_KEEPMOUNT_TMP} = "/keep_tmp";
+
# TASK_WORK is almost exactly like a docker data volume: it
# starts out empty, is writable, and persists until no
# containers use it any more. We don't use --volumes-from to
commit 5cb661ba5347e7c78c09d49a29e78907afd45896
Merge: 76e2a1f 86ac7a7
Author: Tom Clegg <tom at curoverse.com>
Date: Thu Nov 26 14:32:32 2015 -0500
Merge branch '7851-available-port' refs #7851
commit 86ac7a774a81b1e081dc68e44826ad0615e3c3ed
Author: Tom Clegg <tom at curoverse.com>
Date: Thu Nov 26 13:50:32 2015 -0500
7851: Choose available ports for selenium and poltergeist.
Also, display the chosen port numbers at startup.
Without this the Selenium driver defaults to 7055, which causes
interference between concurrent test runs.
diff --git a/apps/workbench/test/integration_helper.rb b/apps/workbench/test/integration_helper.rb
index 207ddd1..a59ac87 100644
--- a/apps/workbench/test/integration_helper.rb
+++ b/apps/workbench/test/integration_helper.rb
@@ -4,21 +4,41 @@ require 'capybara/poltergeist'
require 'uri'
require 'yaml'
+def available_port for_what
+ Addrinfo.tcp("0.0.0.0", 0).listen do |srv|
+ port = srv.connect_address.ip_port
+ STDERR.puts "Using port #{port} for #{for_what}"
+ return port
+ end
+end
+
+SELENIUM_OPTS = {
+ port: available_port('selenium'),
+}
+
POLTERGEIST_OPTS = {
- window_size: [1200, 800],
phantomjs_options: ['--ignore-ssl-errors=true'],
- inspector: true,
+ port: available_port('poltergeist'),
+ window_size: [1200, 800],
}
Capybara.register_driver :poltergeist do |app|
Capybara::Poltergeist::Driver.new app, POLTERGEIST_OPTS
end
+Capybara.register_driver :poltergeist_debug do |app|
+ Capybara::Poltergeist::Driver.new app, POLTERGEIST_OPTS.merge(inspector: true)
+end
+
Capybara.register_driver :poltergeist_without_file_api do |app|
js = File.expand_path '../support/remove_file_api.js', __FILE__
Capybara::Poltergeist::Driver.new app, POLTERGEIST_OPTS.merge(extensions: [js])
end
+Capybara.register_driver :selenium do |app|
+ Capybara::Selenium::Driver.new app, SELENIUM_OPTS
+end
+
Capybara.register_driver :selenium_with_download do |app|
profile = Selenium::WebDriver::Firefox::Profile.new
profile['browser.download.dir'] = DownloadHelper.path.to_s
@@ -28,7 +48,7 @@ Capybara.register_driver :selenium_with_download do |app|
profile['browser.download.manager.showWhenStarting'] = false
profile['browser.helperApps.alwaysAsk.force'] = false
profile['browser.helperApps.neverAsk.saveToDisk'] = 'text/plain,application/octet-stream'
- Capybara::Selenium::Driver.new app, profile: profile
+ Capybara::Selenium::Driver.new app, SELENIUM_OPTS.merge(profile: profile)
end
module WaitForAjax
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list