# Find FUSE mounts under $CRUNCH_TMP and unmount them. Then clean
# up work directories crunch_tmp/work, crunch_tmp/opt,
# crunch_tmp/src*.
- #
- # TODO: When #5036 is done and widely deployed, we can limit mount's
- # -t option to simply fuse.keep.
my ($exited, $stdout, $stderr) = srun_sync(
["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
- ['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk "(index(\$3, \"$CRUNCH_TMP\") == 1){print \$3}" | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid'],
+ ['bash', '-ec', q{
+arv-mount --unmount-timeout 10 --unmount-all ${CRUNCH_TMP}
+rm -rf ${JOB_WORK} ${CRUNCH_INSTALL} ${CRUNCH_TMP}/task ${CRUNCH_TMP}/src* ${CRUNCH_TMP}/*.cid
+ }],
{label => "clean work dirs"});
if ($exited != 0) {
exit(EX_RETRY_UNLOCKED);
Log (undef, "docker image hash is $docker_hash");
$docker_stream =~ s/^\.//;
my $docker_install_script = qq{
-if $docker_bin images -q --no-trunc --all | grep -xF \Q$docker_hash\E >/dev/null; then
- exit 0
+loaded() {
+ id=\$($docker_bin inspect --format="{{.ID}}" \Q$docker_hash\E) || return 1
+ echo "image ID is \$id"
+ [[ \${id} = \Q$docker_hash\E ]]
+}
+if loaded >&2 2>/dev/null; then
+ echo >&2 "image is already present"
+ exit 0
fi
-declare -a exit_codes=("\${PIPESTATUS[@]}")
-if [ 0 != "\${exit_codes[0]}" ]; then
- exit "\${exit_codes[0]}" # `docker images` failed
-elif [ 1 != "\${exit_codes[1]}" ]; then
- exit "\${exit_codes[1]}" # `grep` encountered an error
-else
- # Everything worked fine, but grep didn't find the image on this host.
- arv-get \Q$docker_locator$docker_stream/$docker_hash.tar\E | $docker_bin load
+echo >&2 "docker image is not present; loading"
+arv-get \Q$docker_locator$docker_stream/$docker_hash.tar\E | $docker_bin load
+if ! loaded >&2; then
+ echo >&2 "`docker load` exited 0, but image is not found (!)"
+ exit 1
fi
+echo >&2 "image loaded successfully"
};
my ($exited, $stdout, $stderr) = srun_sync(
{
Log (undef, "knob " . $_);
}
+my $resp = api_call(
+ 'nodes/list',
+ 'filters' => [['hostname', 'in', \@node]],
+ 'order' => 'hostname',
+ 'limit' => scalar(@node),
+ );
+for my $n (@{$resp->{items}}) {
+ Log(undef, "$n->{hostname} $n->{uuid} ".JSON::encode_json($n->{properties}));
+}
my $collated_output = save_output_collection();
Log (undef, "finish");
-save_meta();
+my $final_log = save_meta();
my $final_state;
-if ($collated_output && $main::success) {
+if ($collated_output && $final_log && $main::success) {
$final_state = 'Complete';
} else {
$final_state = 'Failed';
import arvados
import sys
print (arvados.api("v1").collections().
- create(body={"manifest_text": sys.stdin.read()}).
+ create(body={"manifest_text": sys.stdin.read(),
+ "owner_uuid": sys.argv[2]}).
execute(num_retries=int(sys.argv[1]))["portable_data_hash"])
-}, retry_count());
+}, retry_count(), $Job->{owner_uuid});
my $task_idx = -1;
my $manifest_size = 0;
$log_pipe_pid = open2($log_pipe_out, $log_pipe_in,
'arv-put',
'--stream',
- '--retries', '3',
+ '--retries', '6',
'--filename', $logfilename,
'-');
$log_pipe_out_buf = "";
});
Log(undef, "log collection is " . $log_coll->{portable_data_hash});
$Job->update_attributes('log' => $log_coll->{portable_data_hash});
+
+ return $log_coll->{portable_data_hash};
}