my $cleanpid = fork();
if ($cleanpid == 0)
{
+ # Find FUSE mounts that look like Keep mounts (the mount path has the
+ # word "keep") and unmount them. Then clean up work directories.
+ # TODO: When #5036 is done and widely deployed, we can get rid of the
+ # regular expression and just unmount everything with type fuse.keep.
srun (["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
- ['bash', '-c', 'if mount | grep -q $JOB_WORK/; then for i in $JOB_WORK/*keep $CRUNCH_TMP/task/*.keep; do /bin/fusermount -z -u $i; done; fi; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src*']);
+ ['bash', '-ec', 'mount -t fuse,fuse.keep | awk \'($3 ~ /\ykeep\y/){print $3}\' | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src*']);
exit (1);
}
while (1)
return $output_block;
}
-# create_output_collections generates a new collection containing the
-# output of each successfully completed task, and returns the
-# portable_data_hash for the new collection.
-#
+# Create a collection by concatenating the output of all tasks (each
+# task's output is either a manifest fragment, a locator for a
+# manifest fragment stored in Keep, or nothing at all). Return the
+# portable_data_hash of the new collection.
sub create_output_collection
{
Log (undef, "collate");
'.execute()["portable_data_hash"]'
);
+ my $task_idx = -1;
for (@jobstep)
{
- next if (!exists $_->{'arvados_task'}->{'output'} ||
- !$_->{'arvados_task'}->{'success'});
+ ++$task_idx;
+ next unless exists $_->{'arvados_task'}->{'output'};
my $output = $_->{'arvados_task'}->{output};
if ($output !~ /^[0-9a-f]{32}(\+\S+)*$/)
{
}
else
{
- Log (undef, "XXX fetch_block($output) failed XXX");
+ my $uuid = $_->{'arvados_task'}->{'uuid'};
+ Log (undef, "Error retrieving '$output' output by task $task_idx ($uuid)");
$main::success = 0;
}
}
unlink "$destdir.commit";
mkdir $destdir;
-open TARX, "|-", "tar", "-xC", $destdir;
-{
- local $/ = undef;
- print TARX <DATA>;
+
+if (!open(TARX, "|-", "tar", "-xC", $destdir)) {
+ die "Error launching 'tar -xC $destdir': $!";
+}
+# If we send too much data to tar in one write (> 4-5 MiB), it stops, and we
+# get SIGPIPE. We must feed it data incrementally.
+my $tar_input;
+while (read(DATA, $tar_input, 65536)) {
+ print TARX $tar_input;
}
if(!close(TARX)) {
die "'tar -xC $destdir' exited $?: $!";