+ croak("Installing Docker image from $docker_locator exited "
+ .exit_status_s($?));
+ }
+
+ if ($Job->{arvados_sdk_version}) {
+ # The job also specifies an Arvados SDK version. Add the SDKs to the
+ # tar file for the build script to install.
+ Log(undef, sprintf("Packing Arvados SDK version %s for installation",
+ $Job->{arvados_sdk_version}));
+ add_git_archive("git", "--git-dir=$git_dir", "archive",
+ "--prefix=.arvados.sdk/",
+ $Job->{arvados_sdk_version}, "sdk");
+ }
+}
+
+if (!defined $git_dir && $Job->{'script_version'} =~ m{^/}) {
+ # If script_version looks like an absolute path, *and* the --git-dir
+ # argument was not given -- which implies we were not invoked by
+ # crunch-dispatch -- we will use the given path as a working
+ # directory instead of resolving script_version to a git commit (or
+ # doing anything else with git).
+ $ENV{"CRUNCH_SRC_COMMIT"} = $Job->{'script_version'};
+ $ENV{"CRUNCH_SRC"} = $Job->{'script_version'};
+}
+else {
+ # Resolve the given script_version to a git commit sha1. Also, if
+ # the repository is remote, clone it into our local filesystem: this
+ # ensures "git archive" will work, and is necessary to reliably
+ # resolve a symbolic script_version like "master^".
+ $ENV{"CRUNCH_SRC"} = "$ENV{CRUNCH_TMP}/src";
+
+ Log (undef, "Looking for version ".$Job->{script_version}." from repository ".$Job->{repository});
+
+ $ENV{"CRUNCH_SRC_COMMIT"} = $Job->{script_version};
+
+ # If we're running under crunch-dispatch, it will have already
+ # pulled the appropriate source tree into its own repository, and
+ # given us that repo's path as $git_dir.
+ #
+ # If we're running a "local" job, we might have to fetch content
+ # from a remote repository.
+ #
+ # (Currently crunch-dispatch gives a local path with --git-dir, but
+ # we might as well accept URLs there too in case it changes its
+ # mind.)
+ my $repo = $git_dir || $Job->{'repository'};
+
+ # Repository can be remote or local. If remote, we'll need to fetch it
+ # to a local dir before doing `git log` et al.
+ my $repo_location;
+
+ if ($repo =~ m{://|^[^/]*:}) {
+ # $repo is a git url we can clone, like git:// or https:// or
+ # file:/// or [user@]host:repo.git. Note "user/name@host:foo" is
+ # not recognized here because distinguishing that from a local
+ # path is too fragile. If you really need something strange here,
+ # use the ssh:// form.
+ $repo_location = 'remote';
+ } elsif ($repo =~ m{^\.*/}) {
+ # $repo is a local path to a git index. We'll also resolve ../foo
+ # to ../foo/.git if the latter is a directory. To help
+ # disambiguate local paths from named hosted repositories, this
+ # form must be given as ./ or ../ if it's a relative path.
+ if (-d "$repo/.git") {
+ $repo = "$repo/.git";
+ }
+ $repo_location = 'local';
+ } else {
+ # $repo is none of the above. It must be the name of a hosted
+ # repository.
+ my $arv_repo_list = api_call("repositories/list",
+ 'filters' => [['name','=',$repo]]);
+ my @repos_found = @{$arv_repo_list->{'items'}};
+ my $n_found = $arv_repo_list->{'serverResponse'}->{'items_available'};
+ if ($n_found > 0) {
+ Log(undef, "Repository '$repo' -> "
+ . join(", ", map { $_->{'uuid'} } @repos_found));
+ }
+ if ($n_found != 1) {
+ croak("Error: Found $n_found repositories with name '$repo'.");
+ }
+ $repo = $repos_found[0]->{'fetch_url'};
+ $repo_location = 'remote';
+ }
+ Log(undef, "Using $repo_location repository '$repo'");
+ $ENV{"CRUNCH_SRC_URL"} = $repo;
+
+ # Resolve given script_version (we'll call that $treeish here) to a
+ # commit sha1 ($commit).
+ my $treeish = $Job->{'script_version'};
+ my $commit;
+ if ($repo_location eq 'remote') {
+ # We minimize excess object-fetching by re-using the same bare
+ # repository in CRUNCH_TMP/.git for multiple crunch-jobs -- we
+ # just keep adding remotes to it as needed.
+ my $local_repo = $ENV{'CRUNCH_TMP'}."/.git";
+ my $gitcmd = "git --git-dir=\Q$local_repo\E";
+
+ # Set up our local repo for caching remote objects, making
+ # archives, etc.
+ if (!-d $local_repo) {
+ make_path($local_repo) or croak("Error: could not create $local_repo");
+ }
+ # This works (exits 0 and doesn't delete fetched objects) even
+ # if $local_repo is already initialized:
+ `$gitcmd init --bare`;
+ if ($?) {
+ croak("Error: $gitcmd init --bare exited ".exit_status_s($?));
+ }
+
+ # If $treeish looks like a hash (or abbrev hash) we look it up in
+ # our local cache first, since that's cheaper. (We don't want to
+ # do that with tags/branches though -- those change over time, so
+ # they should always be resolved by the remote repo.)
+ if ($treeish =~ /^[0-9a-f]{7,40}$/s) {
+ # Hide stderr because it's normal for this to fail:
+ my $sha1 = `$gitcmd rev-list -n1 ''\Q$treeish\E 2>/dev/null`;
+ if ($? == 0 &&
+ # Careful not to resolve a branch named abcdeff to commit 1234567:
+ $sha1 =~ /^$treeish/ &&
+ $sha1 =~ /^([0-9a-f]{40})$/s) {
+ $commit = $1;
+ Log(undef, "Commit $commit already present in $local_repo");
+ }
+ }
+
+ if (!defined $commit) {
+ # If $treeish isn't just a hash or abbrev hash, or isn't here
+ # yet, we need to fetch the remote to resolve it correctly.
+
+ # First, remove all local heads. This prevents a name that does
+ # not exist on the remote from resolving to (or colliding with)
+ # a previously fetched branch or tag (possibly from a different
+ # remote).
+ remove_tree("$local_repo/refs/heads", {keep_root => 1});
+
+ Log(undef, "Fetching objects from $repo to $local_repo");
+ `$gitcmd fetch --no-progress --tags ''\Q$repo\E \Q+refs/heads/*:refs/heads/*\E`;
+ if ($?) {
+ croak("Error: `$gitcmd fetch` exited ".exit_status_s($?));
+ }
+ }
+
+ # Now that the data is all here, we will use our local repo for
+ # the rest of our git activities.
+ $repo = $local_repo;
+ }
+
+ my $gitcmd = "git --git-dir=\Q$repo\E";
+ my $sha1 = `$gitcmd rev-list -n1 ''\Q$treeish\E`;
+ unless ($? == 0 && $sha1 =~ /^([0-9a-f]{40})$/) {
+ croak("`$gitcmd rev-list` exited "
+ .exit_status_s($?)
+ .", '$treeish' not found. Giving up.");
+ }
+ $commit = $1;
+ Log(undef, "Version $treeish is commit $commit");
+
+ if ($commit ne $Job->{'script_version'}) {
+ # Record the real commit id in the database, frozentokey, logs,
+ # etc. -- instead of an abbreviation or a branch name which can
+ # become ambiguous or point to a different commit in the future.
+ if (!$Job->update_attributes('script_version' => $commit)) {
+ croak("Error: failed to update job's script_version attribute");
+ }
+ }
+
+ $ENV{"CRUNCH_SRC_COMMIT"} = $commit;
+ add_git_archive("$gitcmd archive ''\Q$commit\E");
+}
+
+my $git_archive = combined_git_archive();
+if (!defined $git_archive) {
+ Log(undef, "Skip install phase (no git archive)");
+ if ($have_slurm) {
+ Log(undef, "Warning: This probably means workers have no source tree!");
+ }
+}
+else {
+ Log(undef, "Run install script on all workers");
+
+ my @srunargs = ("srun",
+ "--nodelist=$nodelist",
+ "-D", $ENV{'TMPDIR'}, "--job-name=$job_id");
+ my @execargs = ("sh", "-c",
+ "mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -");
+
+ my $installpid = fork();
+ if ($installpid == 0)
+ {
+ srun (\@srunargs, \@execargs, {}, $build_script . $git_archive);
+ exit (1);
+ }
+ while (1)
+ {
+ last if $installpid == waitpid (-1, WNOHANG);
+ freeze_if_want_freeze ($installpid);
+ select (undef, undef, undef, 0.1);