[ARVADOS] updated: 7f83b8600e45216c2de47cad06b7eaff4ad5c9cd
git at public.curoverse.com
git at public.curoverse.com
Tue Dec 2 15:35:31 EST 2014
Summary of changes:
sdk/cli/bin/crunch-job | 51 ++++++++++++++++++++++++--------------------------
1 file changed, 24 insertions(+), 27 deletions(-)
via 7f83b8600e45216c2de47cad06b7eaff4ad5c9cd (commit)
from dc706977b754f7b5b0e5835ef80a2b59bc1773f8 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 7f83b8600e45216c2de47cad06b7eaff4ad5c9cd
Author: Tim Pierce <twp at curoverse.com>
Date: Tue Dec 2 15:32:52 2014 -0500
4621: collate_output pipes to python
Rewrote collate_output as create_output_collection, writing its output
data to a Python subprocess that invokes
arvados.api().collections().create(). Writing very large collection
manifests in-process makes Arvados.pm consume inordinate amounts of
memory.
Refs #4621.
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index 0265a48..10069fb 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -823,7 +823,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
{
$main::please_info = 0;
freeze();
- collate_output();
+ create_output_collection();
save_meta(1);
update_progress_stats();
}
@@ -885,7 +885,7 @@ while (%proc)
$main::please_continue = 0;
goto THISROUND;
}
- $main::please_info = 0, freeze(), collate_output(), save_meta(1) if $main::please_info;
+ $main::please_info = 0, freeze(), create_output_collection(), save_meta(1) if $main::please_info;
readfrompipes ();
if (!reapchildren())
{
@@ -922,28 +922,14 @@ goto ONELEVEL if !defined $main::success;
release_allocation();
freeze();
-my $collated_output = &collate_output();
+my $collated_output = &create_output_collection();
if (!$collated_output) {
- Log(undef, "output undef");
+ Log (undef, "Failed to write output collection");
}
else {
- eval {
- open(my $orig_manifest, '-|', 'arv-get', $collated_output)
- or die "failed to get collated manifest: $!";
- my $orig_manifest_text = '';
- while (my $manifest_line = <$orig_manifest>) {
- $orig_manifest_text .= $manifest_line;
- }
- my $output = api_call("collections/create", collection => {
- 'manifest_text' => $orig_manifest_text});
- Log(undef, "output uuid " . $output->{uuid});
- Log(undef, "output hash " . $output->{portable_data_hash});
- $Job->update_attributes('output' => $output->{portable_data_hash});
- };
- if ($@) {
- Log (undef, "Failed to register output manifest: $@");
- }
+ Log(undef, "output hash " . $collated_output);
+ $Job->update_attributes('output' => $collated_output);
}
Log (undef, "finish");
@@ -1275,13 +1261,24 @@ sub fetch_block
return $output_block;
}
-sub collate_output
+# create_output_collections generates a new collection containing the
+# output of each successfully completed task, and returns the
+# portable_data_hash for the new collection.
+#
+sub create_output_collection
{
Log (undef, "collate");
my ($child_out, $child_in);
- my $pid = open2($child_out, $child_in, 'arv-put', '--raw',
- '--retries', retry_count());
+ my $pid = open2($child_out, $child_in, 'python', '-c',
+ 'import arvados; ' .
+ 'import sys; ' .
+ 'print arvados.api()' .
+ '.collections()' .
+ '.create(body={"manifest_text":sys.stdin.read()})' .
+ '.execute()["portable_data_hash"]'
+ );
+
my $joboutput;
for (@jobstep)
{
@@ -1318,7 +1315,7 @@ sub collate_output
chomp($joboutput);
# TODO: Ensure exit status == 0.
} else {
- Log (undef, "timed out reading from 'arv-put'");
+ Log (undef, "timed out reading portable_data_hash");
}
}
# TODO: kill $pid instead of waiting, now that we've decided to
@@ -1469,7 +1466,7 @@ sub croak
my $message = "@_ at $file line $line\n";
Log (undef, $message);
freeze() if @jobstep_todo;
- collate_output() if @jobstep_todo;
+ create_output_collection() if @jobstep_todo;
cleanup();
save_meta();
die;
@@ -1521,7 +1518,7 @@ sub freeze_if_want_freeze
}
}
freeze();
- collate_output();
+ create_output_collection();
cleanup();
save_meta();
exit 1;
@@ -1702,7 +1699,7 @@ sub api_call {
if ($next_try_at < time) {
$retry_msg = "Retrying.";
} else {
- my $next_try_fmt = strftime("%Y-%m-%d %H:%M:%S", localtime($next_try_at));
+ my $next_try_fmt = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($next_try_at);
$retry_msg = "Retrying at $next_try_fmt.";
}
Log(undef, "API method $method_name failed: $errmsg. $retry_msg");
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list