|cwd|string|Initial working directory, given as an absolute path (in the container) or a path relative to the WORKDIR given in the image's Dockerfile.|Required.|
|command|array of strings|Command to execute in the container.|Required. e.g., @["echo","hello"]@|
|output_path|string|Path to a directory or file inside the container that should be preserved as container's output when it finishes. This path must be one of the mount targets. For best performance, point output_path to a writable collection mount. See "Pre-populate output using Mount points":#pre-populate-output for details regarding optional output pre-population using mount points and "Symlinks in output":#symlinks-in-output for additional details.|Required.|
+|output_glob|array of strings|Glob patterns determining which files (of those present in the output directory when the container finishes) will be included in the output collection. If multiple patterns are given, files that match any pattern are included. If null or empty, all files will be included.|"Doublestar" patterns are accepted:
+@foo/**@ matches the entire tree rooted at @foo@ in the top level of the output directory.
+@**/fo[og]@ matches all files named @foo@ or @fog@ anywhere in the tree.|
|output_name|string|Desired name for the output collection. If null or empty, a name will be assigned automatically.||
|output_ttl|integer|Desired lifetime for the output collection, in seconds. If zero, the output collection will not be deleted automatically.||
|priority|integer|Range 0-1000. Indicate scheduling order preference.|Clients are expected to submit container requests with zero priority in order to preview the container that will be used to satisfy it. Priority can be null if and only if state!="Committed". See "below for more details":#priority .|
|finished_at|datetime|When this container finished.|Null if container has not yet finished.|
|log|string|Portable data hash of a collection containing the log messages produced when executing the container.|Null if container has not yet started. The Crunch system will periodically update this field for a running container.|
|environment|hash|Environment variables and values that should be set in the container environment (@docker run --env@). This augments and (when conflicts exist) overrides environment variables given in the image's Dockerfile.|Must be equal to a ContainerRequest's environment in order to satisfy the ContainerRequest.|
-|cwd|string|Initial working directory.|Must be equal to a ContainerRequest's cwd in order to satisfy the ContainerRequest|
+|cwd|string|Initial working directory.|Must be equal to a ContainerRequest's cwd in order to satisfy the ContainerRequest.|
|command|array of strings|Command to execute.| Must be equal to a ContainerRequest's command in order to satisfy the ContainerRequest.|
|output_path|string|Path to a directory or file inside the container that should be preserved as this container's output when it finishes.|Must be equal to a ContainerRequest's output_path in order to satisfy the ContainerRequest.|
+|output_glob|array of strings|Glob patterns determining which files will be included in the output collection. See corresponding attribute in the "container_requests resource":container_requests.html.|Must be equal to a ContainerRequest's output_glob in order to satisfy the ContainerRequest.|
|mounts|hash|Must contain the same keys as the ContainerRequest being satisfied. Each value must be within the range of values described in the ContainerRequest at the time the Container is assigned to the ContainerRequest.|See "Mount types":#mount_types for more details.|
|secret_mounts|hash|Must contain the same keys as the ContainerRequest being satisfied. Each value must be within the range of values described in the ContainerRequest at the time the Container is assigned to the ContainerRequest.|Not returned in API responses. Reset to empty when state is "Complete" or "Cancelled".|
|runtime_constraints|hash|Compute resources, and access to the outside world, that are / were available to the container.
github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bgentry/speakeasy v0.1.0 // indirect
+ github.com/bmatcuk/doublestar v1.3.4 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dimchansky/utfbom v1.1.1 // indirect
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bgentry/speakeasy v0.1.0 h1:ByYyxL9InA1OWqxJqqp2A5pYHUrCiAL6K3J+LKSsQkY=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
+github.com/bmatcuk/doublestar v1.3.4 h1:gPypJ5xD31uhX6Tf54sDPUOBXTqKH4c9aPY66CyQrS0=
+github.com/bmatcuk/doublestar v1.3.4/go.mod h1:wiQtGV+rzVYxB7WIlirSN++5HPtPlXEo9MEoZQC/PmE=
github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps=
github.com/bradleypeabody/godap v0.0.0-20170216002349-c249933bc092 h1:0Di2onNnlN5PAyWPbqlPyN45eOQ+QW/J9eqLynt4IV4=
github.com/bradleypeabody/godap v0.0.0-20170216002349-c249933bc092/go.mod h1:8IzBjZCRSnsvM6MJMG8HNNtnzMl48H22rbJL2kRUJ0Y=
"errors"
"fmt"
"io"
+ "io/fs"
"os"
"path/filepath"
"sort"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/keepclient"
"git.arvados.org/arvados.git/sdk/go/manifest"
+ "github.com/bmatcuk/doublestar"
)
type printfer interface {
keepClient IKeepClient
hostOutputDir string
ctrOutputDir string
+ globs []string
bindmounts map[string]bindmount
mounts map[string]arvados.Mount
secretMounts map[string]arvados.Mount
if err != nil {
return "", fmt.Errorf("error scanning files to copy to output: %v", err)
}
- fs, err := (&arvados.Collection{ManifestText: cp.manifest}).FileSystem(cp.client, cp.keepClient)
+ collfs, err := (&arvados.Collection{ManifestText: cp.manifest}).FileSystem(cp.client, cp.keepClient)
if err != nil {
return "", fmt.Errorf("error creating Collection.FileSystem: %v", err)
}
+ err = cp.applyGlobsToCollectionFS(collfs)
+ if err != nil {
+ return "", fmt.Errorf("error while removing non-matching files from output collection: %w", err)
+ }
+ cp.applyGlobsToFilesAndDirs()
for _, d := range cp.dirs {
- err = fs.Mkdir(d, 0777)
+ err = collfs.Mkdir(d, 0777)
if err != nil && err != os.ErrExist {
return "", fmt.Errorf("error making directory %q in output collection: %v", d, err)
}
// open so f's data can be packed with it).
dir, _ := filepath.Split(f.dst)
if dir != lastparentdir || unflushed > keepclient.BLOCKSIZE {
- if err := fs.Flush("/"+lastparentdir, dir != lastparentdir); err != nil {
+ if err := collfs.Flush("/"+lastparentdir, dir != lastparentdir); err != nil {
return "", fmt.Errorf("error flushing output collection file data: %v", err)
}
unflushed = 0
}
lastparentdir = dir
- n, err := cp.copyFile(fs, f)
+ n, err := cp.copyFile(collfs, f)
if err != nil {
return "", fmt.Errorf("error copying file %q into output collection: %v", f, err)
}
unflushed += n
}
- return fs.MarshalManifest(".")
+ return collfs.MarshalManifest(".")
+}
+
+func (cp *copier) matchGlobs(path string) bool {
+ // An entry in the top level of the output directory looks
+ // like "/foo", but globs look like "foo", so we strip the
+ // leading "/" before matching.
+ path = strings.TrimLeft(path, "/")
+ for _, glob := range cp.globs {
+ if match, _ := doublestar.Match(glob, path); match {
+ return true
+ }
+ }
+ return false
+}
+
+// Delete entries from cp.files that do not match cp.globs.
+//
+// Delete entries from cp.dirs that do not match cp.globs.
+//
+// Ensure parent/ancestor directories of remaining cp.files and
+// cp.dirs entries are still present in cp.dirs, even if they do not
+// match cp.globs themselves.
+func (cp *copier) applyGlobsToFilesAndDirs() {
+ if len(cp.globs) == 0 {
+ return
+ }
+ keepdirs := make(map[string]bool)
+ for _, path := range cp.dirs {
+ if cp.matchGlobs(path) {
+ keepdirs[path] = true
+ }
+ }
+ for path := range keepdirs {
+ for i, c := range path {
+ if i > 0 && c == '/' {
+ keepdirs[path[:i]] = true
+ }
+ }
+ }
+ var keepfiles []filetodo
+ for _, file := range cp.files {
+ if cp.matchGlobs(file.dst) {
+ keepfiles = append(keepfiles, file)
+ }
+ }
+ for _, file := range keepfiles {
+ for i, c := range file.dst {
+ if i > 0 && c == '/' {
+ keepdirs[file.dst[:i]] = true
+ }
+ }
+ }
+ cp.dirs = nil
+ for path := range keepdirs {
+ cp.dirs = append(cp.dirs, path)
+ }
+ sort.Strings(cp.dirs)
+ cp.files = keepfiles
+}
+
+// Delete files in collfs that do not match cp.globs. Also delete
+// directories that are empty (after deleting non-matching files) and
+// do not match cp.globs themselves.
+func (cp *copier) applyGlobsToCollectionFS(collfs arvados.CollectionFileSystem) error {
+ if len(cp.globs) == 0 {
+ return nil
+ }
+ include := make(map[string]bool)
+ err := fs.WalkDir(arvados.FS(collfs), "", func(path string, ent fs.DirEntry, err error) error {
+ if cp.matchGlobs(path) {
+ for i, c := range path {
+ if i > 0 && c == '/' {
+ include[path[:i]] = true
+ }
+ }
+ include[path] = true
+ }
+ return nil
+ })
+ if err != nil {
+ return err
+ }
+ err = fs.WalkDir(arvados.FS(collfs), "", func(path string, ent fs.DirEntry, err error) error {
+ if err != nil || path == "" {
+ return err
+ }
+ if !include[path] {
+ err := collfs.RemoveAll(path)
+ if err != nil {
+ return err
+ }
+ if ent.IsDir() {
+ return fs.SkipDir
+ }
+ }
+ return nil
+ })
+ return err
}
func (cp *copier) copyFile(fs arvados.CollectionFileSystem, f filetodo) (int64, error) {
import (
"bytes"
"io"
+ "io/fs"
"io/ioutil"
"os"
+ "sort"
"syscall"
"git.arvados.org/arvados.git/sdk/go/arvados"
c.Assert(err, check.IsNil)
c.Assert(f.Close(), check.IsNil)
}
+
+func (s *copierSuite) TestApplyGlobsToFilesAndDirs(c *check.C) {
+ dirs := []string{"dir1", "dir1/dir11", "dir1/dir12", "dir2"}
+ files := []string{"dir1/file11", "dir1/dir11/file111", "dir2/file2"}
+ for _, trial := range []struct {
+ globs []string
+ dirs []string
+ files []string
+ }{
+ {
+ globs: []string{},
+ dirs: append([]string{}, dirs...),
+ files: append([]string{}, files...),
+ },
+ {
+ globs: []string{"**"},
+ dirs: append([]string{}, dirs...),
+ files: append([]string{}, files...),
+ },
+ {
+ globs: []string{"**/file111"},
+ dirs: []string{"dir1", "dir1/dir11"},
+ files: []string{"dir1/dir11/file111"},
+ },
+ {
+ globs: []string{"nothing"},
+ dirs: nil,
+ files: nil,
+ },
+ {
+ globs: []string{"**/dir12"},
+ dirs: []string{"dir1", "dir1/dir12"},
+ files: nil,
+ },
+ {
+ globs: []string{"**/file*"},
+ dirs: []string{"dir1", "dir1/dir11", "dir2"},
+ files: append([]string{}, files...),
+ },
+ {
+ globs: []string{"**/dir1[12]"},
+ dirs: []string{"dir1", "dir1/dir11", "dir1/dir12"},
+ files: nil,
+ },
+ {
+ globs: []string{"**/dir1[^2]"},
+ dirs: []string{"dir1", "dir1/dir11"},
+ files: nil,
+ },
+ } {
+ c.Logf("=== globs: %q", trial.globs)
+ cp := copier{
+ globs: trial.globs,
+ dirs: dirs,
+ }
+ for _, path := range files {
+ cp.files = append(cp.files, filetodo{dst: path})
+ }
+ cp.applyGlobsToFilesAndDirs()
+ var gotFiles []string
+ for _, file := range cp.files {
+ gotFiles = append(gotFiles, file.dst)
+ }
+ c.Check(cp.dirs, check.DeepEquals, trial.dirs)
+ c.Check(gotFiles, check.DeepEquals, trial.files)
+ }
+}
+
+func (s *copierSuite) TestApplyGlobsToCollectionFS(c *check.C) {
+ for _, trial := range []struct {
+ globs []string
+ expect []string
+ }{
+ {
+ globs: nil,
+ expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
+ },
+ {
+ globs: []string{"foo"},
+ expect: []string{"foo"},
+ },
+ {
+ globs: []string{"baz/parent1/item1"},
+ expect: []string{"baz/parent1/item1"},
+ },
+ {
+ globs: []string{"**"},
+ expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
+ },
+ {
+ globs: []string{"**/*"},
+ expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
+ },
+ {
+ globs: []string{"*"},
+ expect: []string{"foo", "bar"},
+ },
+ {
+ globs: []string{"baz"},
+ expect: nil,
+ },
+ {
+ globs: []string{"b*/**"},
+ expect: []string{"baz/quux", "baz/parent1/item1"},
+ },
+ {
+ globs: []string{"baz"},
+ expect: nil,
+ },
+ {
+ globs: []string{"baz/**"},
+ expect: []string{"baz/quux", "baz/parent1/item1"},
+ },
+ {
+ globs: []string{"baz/*"},
+ expect: []string{"baz/quux"},
+ },
+ {
+ globs: []string{"baz/**/*uu?"},
+ expect: []string{"baz/quux"},
+ },
+ {
+ globs: []string{"**/*m1"},
+ expect: []string{"baz/parent1/item1"},
+ },
+ {
+ globs: []string{"*/*/*/**/*1"},
+ expect: nil,
+ },
+ {
+ globs: []string{"f*", "**/q*"},
+ expect: []string{"foo", "baz/quux"},
+ },
+ {
+ globs: []string{"\\"}, // invalid pattern matches nothing
+ expect: nil,
+ },
+ {
+ globs: []string{"\\", "foo"},
+ expect: []string{"foo"},
+ },
+ {
+ globs: []string{"foo/**"},
+ expect: nil,
+ },
+ {
+ globs: []string{"foo*/**"},
+ expect: nil,
+ },
+ } {
+ c.Logf("=== globs: %q", trial.globs)
+ collfs, err := (&arvados.Collection{ManifestText: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo 0:0:bar 0:0:baz/quux 0:0:baz/parent1/item1\n"}).FileSystem(nil, nil)
+ c.Assert(err, check.IsNil)
+ cp := copier{globs: trial.globs}
+ err = cp.applyGlobsToCollectionFS(collfs)
+ if !c.Check(err, check.IsNil) {
+ continue
+ }
+ var got []string
+ fs.WalkDir(arvados.FS(collfs), "", func(path string, ent fs.DirEntry, err error) error {
+ if !ent.IsDir() {
+ got = append(got, path)
+ }
+ return nil
+ })
+ sort.Strings(got)
+ sort.Strings(trial.expect)
+ c.Check(got, check.DeepEquals, trial.expect)
+ }
+}
keepClient: runner.ContainerKeepClient,
hostOutputDir: runner.HostOutputDir,
ctrOutputDir: runner.Container.OutputPath,
+ globs: runner.Container.OutputGlob,
bindmounts: bindmounts,
mounts: runner.Container.Mounts,
secretMounts: runner.SecretMounts,
"state": s.cr.State,
"command": s.cr.Command,
"output_path": s.cr.OutputPath,
+ "output_glob": s.cr.OutputGlob,
"container_image": s.cr.ContainerImage,
"mounts": s.cr.Mounts,
"runtime_constraints": s.cr.RuntimeConstraints,
c.Check(s.logFiles["crunch-run.txt"], Matches, `(?ms).*loaded config file \Q`+os.Getenv("ARVADOS_CONFIG")+`\E\n.*`)
}
+func (s *integrationSuite) TestRunTrivialContainerWithOutputGlob(c *C) {
+ s.cr.OutputGlob = []string{"js?n"}
+ s.testRunTrivialContainer(c)
+ fs, err := s.outputCollection.FileSystem(s.client, s.kc)
+ c.Assert(err, IsNil)
+ _, err = fs.Stat("json")
+ c.Check(err, IsNil)
+ _, err = fs.Stat("inputfile")
+ c.Check(err, Equals, os.ErrNotExist)
+ _, err = fs.Stat("emptydir")
+ c.Check(err, Equals, os.ErrNotExist)
+}
+
func (s *integrationSuite) testRunTrivialContainer(c *C) {
if err := exec.Command("which", s.engine).Run(); err != nil {
c.Skip(fmt.Sprintf("%s: %s", s.engine, err))
var output arvados.Collection
err = s.client.RequestAndDecode(&output, "GET", "arvados/v1/collections/"+s.cr.OutputUUID, nil, nil)
c.Assert(err, IsNil)
- fs, err = output.FileSystem(s.client, s.kc)
- c.Assert(err, IsNil)
- if f, err := fs.Open("inputfile"); c.Check(err, IsNil) {
- defer f.Close()
- buf, err := ioutil.ReadAll(f)
- c.Check(err, IsNil)
- c.Check(string(buf), Equals, "inputdata")
- }
- if f, err := fs.Open("json"); c.Check(err, IsNil) {
- defer f.Close()
- buf, err := ioutil.ReadAll(f)
- c.Check(err, IsNil)
- c.Check(string(buf), Equals, `["foo",{"foo":"bar"},null]`)
- }
- if fi, err := fs.Stat("emptydir"); c.Check(err, IsNil) {
- c.Check(fi.IsDir(), Equals, true)
- }
- if d, err := fs.Open("emptydir"); c.Check(err, IsNil) {
- defer d.Close()
- fis, err := d.Readdir(-1)
+ s.outputCollection = output
+
+ if len(s.cr.OutputGlob) == 0 {
+ fs, err = output.FileSystem(s.client, s.kc)
c.Assert(err, IsNil)
- // crunch-run still saves a ".keep" file to preserve
- // empty dirs even though that shouldn't be
- // necessary. Ideally we would do:
- // c.Check(fis, HasLen, 0)
- for _, fi := range fis {
- c.Check(fi.Name(), Equals, ".keep")
+ if f, err := fs.Open("inputfile"); c.Check(err, IsNil) {
+ defer f.Close()
+ buf, err := ioutil.ReadAll(f)
+ c.Check(err, IsNil)
+ c.Check(string(buf), Equals, "inputdata")
+ }
+ if f, err := fs.Open("json"); c.Check(err, IsNil) {
+ defer f.Close()
+ buf, err := ioutil.ReadAll(f)
+ c.Check(err, IsNil)
+ c.Check(string(buf), Equals, `["foo",{"foo":"bar"},null]`)
+ }
+ if fi, err := fs.Stat("emptydir"); c.Check(err, IsNil) {
+ c.Check(fi.IsDir(), Equals, true)
+ }
+ if d, err := fs.Open("emptydir"); c.Check(err, IsNil) {
+ defer d.Close()
+ fis, err := d.Readdir(-1)
+ c.Assert(err, IsNil)
+ // crunch-run still saves a ".keep" file to preserve
+ // empty dirs even though that shouldn't be
+ // necessary. Ideally we would do:
+ // c.Check(fis, HasLen, 0)
+ for _, fi := range fis {
+ c.Check(fi.Name(), Equals, ".keep")
+ }
}
}
- s.outputCollection = output
}
Mounts map[string]Mount `json:"mounts"`
Output string `json:"output"`
OutputPath string `json:"output_path"`
+ OutputGlob []string `json:"output_glob"`
Priority int64 `json:"priority"`
RuntimeConstraints RuntimeConstraints `json:"runtime_constraints"`
State ContainerState `json:"state"`
Cwd string `json:"cwd"`
Command []string `json:"command"`
OutputPath string `json:"output_path"`
+ OutputGlob []string `json:"output_glob"`
OutputName string `json:"output_name"`
OutputTTL int `json:"output_ttl"`
Priority int `json:"priority"`
},
"subrequests_cost": {
"type": "float"
+ },
+ "output_glob": {
+ "type": "Array"
}
}
},
},
"cumulative_cost": {
"type": "float"
+ },
+ "output_glob": {
+ "type": "Array"
}
}
},
serialize :runtime_constraints, Hash
serialize :command, Array
serialize :scheduling_parameters, Hash
+ serialize :output_glob, Array
after_find :fill_container_defaults_after_find
before_validation :fill_field_defaults, :if => :new_record?
t.add :mounts
t.add :output
t.add :output_path
+ t.add :output_glob
t.add :priority
t.add :progress
t.add :runtime_constraints
cwd: req.cwd,
environment: req.environment,
output_path: req.output_path,
+ output_glob: req.output_glob,
container_image: resolve_container_image(req.container_image),
mounts: resolve_mounts(req.mounts),
runtime_constraints: resolve_runtime_constraints(req.runtime_constraints),
candidates = candidates.where('output_path = ?', attrs[:output_path])
log_reuse_info(candidates) { "after filtering on output_path #{attrs[:output_path].inspect}" }
+ candidates = candidates.where_serialized(:output_glob, attrs[:output_glob], md5: true)
+ log_reuse_info(candidates) { "after filtering on output_glob #{attrs[:output_glob].inspect}" }
+
image = resolve_container_image(attrs[:container_image])
candidates = candidates.where('container_image = ?', image)
log_reuse_info(candidates) { "after filtering on container_image #{image.inspect} (resolved from #{attrs[:container_image].inspect})" }
self.environment ||= {}
self.runtime_constraints ||= {}
self.mounts ||= {}
+ self.output_glob ||= []
self.cwd ||= "."
self.priority ||= 0
self.scheduling_parameters ||= {}
if self.new_record?
permitted.push(:owner_uuid, :command, :container_image, :cwd,
- :environment, :mounts, :output_path, :priority,
- :runtime_constraints, :scheduling_parameters,
- :secret_mounts, :runtime_token,
- :runtime_user_uuid, :runtime_auth_scopes,
- :output_storage_classes)
+ :environment, :mounts, :output_path, :output_glob,
+ :priority, :runtime_constraints,
+ :scheduling_parameters, :secret_mounts,
+ :runtime_token, :runtime_user_uuid,
+ :runtime_auth_scopes, :output_storage_classes)
end
case self.state
cwd: self.cwd,
environment: self.environment,
output_path: self.output_path,
+ output_glob: self.output_glob,
container_image: self.container_image,
mounts: self.mounts,
runtime_constraints: self.runtime_constraints,
serialize :runtime_constraints, Hash
serialize :command, Array
serialize :scheduling_parameters, Hash
+ serialize :output_glob, Array
after_find :fill_container_defaults_after_find
after_initialize { @state_was_when_initialized = self.state_was } # see finalize_if_needed
t.add :name
t.add :output_name
t.add :output_path
+ t.add :output_glob
t.add :output_uuid
t.add :output_ttl
t.add :priority
AttrsPermittedAlways = [:owner_uuid, :state, :name, :description, :properties]
AttrsPermittedBeforeCommit = [:command, :container_count_max,
:container_image, :cwd, :environment, :filters, :mounts,
- :output_path, :priority, :runtime_token,
+ :output_path, :output_glob, :priority, :runtime_token,
:runtime_constraints, :state, :container_uuid, :use_existing,
:scheduling_parameters, :secret_mounts, :output_name, :output_ttl,
:output_storage_classes, :output_properties]
end
def self.full_text_searchable_columns
- super - ["mounts", "secret_mounts", "secret_mounts_md5", "runtime_token", "output_storage_classes"]
+ super - ["mounts", "secret_mounts", "secret_mounts_md5", "runtime_token", "output_storage_classes", "output_glob"]
end
def set_priority_zero
self.container_count_max ||= Rails.configuration.Containers.MaxRetryAttempts
self.scheduling_parameters ||= {}
self.output_ttl ||= 0
+ self.output_glob ||= []
self.priority ||= 0
end
errors.add(:environment, "must be an map of String to String but has entry #{k.class} to #{v.class}")
end
end
+ output_glob.each do |g|
+ if !g.is_a? String
+ errors.add(:output_glob, "must be an array of strings but has entry #{g.class}")
+ end
+ end
[:mounts, :secret_mounts].each do |m|
self[m].each do |k, v|
if !k.is_a?(String) || !v.is_a?(Hash)
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class AddOutputGlobToContainers < ActiveRecord::Migration[7.0]
+ def change
+ add_column :containers, :output_glob, :text, default: '[]'
+ add_column :container_requests, :output_glob, :text, default: '[]'
+ end
+end
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require './db/migrate/20161213172944_full_text_search_indexes'
+
+class AddOutputGlobIndexToContainers < ActiveRecord::Migration[4.2]
+ def up
+ ActiveRecord::Base.connection.execute 'DROP INDEX index_containers_on_reuse_columns'
+ ActiveRecord::Base.connection.execute 'CREATE INDEX index_containers_on_reuse_columns on containers (md5(command), cwd, md5(environment), output_path, md5(output_glob), container_image, md5(mounts), secret_mounts_md5, md5(runtime_constraints))'
+ FullTextSearchIndexes.new.replace_index('container_requests')
+ end
+ def down
+ ActiveRecord::Base.connection.execute 'DROP INDEX index_containers_on_reuse_columns'
+ ActiveRecord::Base.connection.execute 'CREATE INDEX index_containers_on_reuse_columns on containers (md5(command), cwd, md5(environment), output_path, container_image, md5(mounts), secret_mounts_md5, md5(runtime_constraints))'
+ end
+end
runtime_token text,
output_storage_classes jsonb DEFAULT '["default"]'::jsonb,
output_properties jsonb DEFAULT '{}'::jsonb,
- cumulative_cost double precision DEFAULT 0.0 NOT NULL
+ cumulative_cost double precision DEFAULT 0.0 NOT NULL,
+ output_glob text DEFAULT '[]'::text
);
output_storage_classes jsonb DEFAULT '["default"]'::jsonb,
output_properties jsonb DEFAULT '{}'::jsonb,
cost double precision DEFAULT 0.0 NOT NULL,
- subrequests_cost double precision DEFAULT 0.0 NOT NULL
+ subrequests_cost double precision DEFAULT 0.0 NOT NULL,
+ output_glob text DEFAULT '[]'::text
);
CREATE INDEX collections_trgm_text_search_idx ON public.collections USING gin (((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text))) public.gin_trgm_ops);
+--
+-- Name: container_requests_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX container_requests_full_text_search_idx ON public.container_requests USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(requesting_container_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(container_uuid, ''::character varying))::text) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(container_image, ''::character varying))::text) || ' '::text) || COALESCE(environment, ''::text)) || ' '::text) || (COALESCE(cwd, ''::character varying))::text) || ' '::text) || COALESCE(command, ''::text)) || ' '::text) || (COALESCE(output_path, ''::character varying))::text) || ' '::text) || COALESCE(filters, ''::text)) || ' '::text) || COALESCE(scheduling_parameters, ''::text)) || ' '::text) || (COALESCE(output_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output_name, ''::character varying))::text) || ' '::text) || COALESCE((output_properties)::text, ''::text)) || ' '::text) || COALESCE(output_glob, ''::text)), 0, 8000)));
+
+
--
-- Name: container_requests_index_on_properties; Type: INDEX; Schema: public; Owner: -
--
-- Name: index_containers_on_reuse_columns; Type: INDEX; Schema: public; Owner: -
--
-CREATE INDEX index_containers_on_reuse_columns ON public.containers USING btree (md5(command), cwd, md5(environment), output_path, container_image, md5(mounts), secret_mounts_md5, md5(runtime_constraints));
+CREATE INDEX index_containers_on_reuse_columns ON public.containers USING btree (md5(command), cwd, md5(environment), output_path, md5(output_glob), container_image, md5(mounts), secret_mounts_md5, md5(runtime_constraints));
--
('20230815160000'),
('20230821000000'),
('20230922000000'),
-('20231013000000');
+('20231013000000'),
+('20240329173437'),
+('20240402162733');
{"mounts" => {"FOO" => {}}},
{"mounts" => {"FOO" => {"kind" => "tmp", "capacity" => 42.222}}},
{"command" => ["echo", 55]},
- {"environment" => {"FOO" => 55}}
+ {"environment" => {"FOO" => 55}},
+ {"output_glob" => [false]},
+ {"output_glob" => [["bad"]]},
+ {"output_glob" => "bad"},
+ {"output_glob" => ["nope", -1]},
].each do |value|
test "Create with invalid #{value}" do
set_user_from_auth :active
- assert_raises(ActiveRecord::RecordInvalid) do
+ assert_raises(ActiveRecord::RecordInvalid, Serializer::TypeMismatch) do
cr = create_minimal_req!({state: "Committed",
priority: 1}.merge(value))
cr.save!
set_user_from_auth :active
cr = create_minimal_req!(state: "Uncommitted", priority: 1)
cr.save!
- assert_raises(ActiveRecord::RecordInvalid) do
+ assert_raises(ActiveRecord::RecordInvalid, Serializer::TypeMismatch) do
cr = ContainerRequest.find_by_uuid cr.uuid
cr.update!({state: "Committed",
priority: 1}.merge(value))
cwd: "test",
command: ["echo", "hello"],
output_path: "test",
+ output_glob: [],
runtime_constraints: {
"API" => false,
"keep_cache_disk" => 0,
environment: {},
mounts: {},
output_path: "test",
+ output_glob: [],
runtime_auth_scopes: ["all"],
runtime_constraints: {
"API" => false,