h3. Storage management notes
-The "keep-balance":{{site.baseurl}}/install/install-keep-balance.html service is responsible for deciding which blocks should be placed on which keepstore volumes. As part of the rebalancing behavior, it will determine where a block should go in order to satisfy the desired storage classes, and issue pull requests to copy the block from its original volume to the desired volume. The block will subsequently be moved to trash on the original volume.
+When uploading data, if a data block cannot be uploaded to all desired storage classes, it will result in a fatal error. Data blocks will not be uploaded to volumes that do not have the desired storage class.
-If a block appears in multiple collections with different storage classes, the block will be stored in separate volumes for each storage class, even if that results in overreplication, unless there is a volume which has all the desired storage classes.
+If you change the storage classes for a collection, the data is not moved immediately. The "keep-balance":{{site.baseurl}}/install/install-keep-balance.html service is responsible for deciding which blocks should be placed on which keepstore volumes. As part of the rebalancing behavior, it will determine where a block should go in order to satisfy the desired storage classes, and issue pull requests to copy the block from its original volume to the desired volume. The block will subsequently be moved to trash on the original volume.
-If a collection has a desired storage class which is not available in any keepstore volume, the collection's blocks will remain in place, and an error will appear in the @keep-balance@ logs.
+If a block is assigned to multiple storage classes, the block will be stored on @desired_replication@ number of volumes for storage class, even if that results in overreplication.
-This feature does not provide a hard guarantee on where data will be stored. Data may be written to default storage and moved to the desired storage class later. If controlling data locality is a hard requirement (such as legal restrictions on the location of data) we recommend setting up multiple Arvados clusters.
+If a collection has a desired storage class which is not available in any keepstore volume, the collection's blocks will remain in place, and an error will appear in the @keep-balance@ logs.
|runtime_token|string|A v2 token to be passed into the container itself, used to access Keep-backed mounts, etc. |Not returned in API responses. Reset to null when state is "Complete" or "Cancelled".|
|runtime_user_uuid|string|The user permission that will be granted to this container.||
|runtime_auth_scopes|array of string|The scopes associated with the auth token used to run this container.||
+|output_storage_classes|array of strings|The storage classes that will be used for the log and output collections of this container request|default is ["default"]|
h2(#priority). Priority
|runtime_token|string|A v2 token to be passed into the container itself, used to access Keep-backed mounts, etc.|Not returned in API responses. Reset to null when state is "Complete" or "Cancelled".|
|gateway_address|string|Address (host:port) of gateway server.|Internal use only.|
|interactive_session_started|boolean|Indicates whether @arvados-client shell@ has been used to run commands in the container, which may have altered the container's behavior and output.||
+|output_storage_classes|array of strings|The storage classes that will be used for the log and output collections of this container||
h2(#container_states). Container states
ManifestFileReader(m manifest.Manifest, filename string) (arvados.File, error)
LocalLocator(locator string) (string, error)
ClearBlockCache()
+ SetStorageClasses(sc []string)
}
// NewLogWriter is a factory function to create a new log writer.
"--foreground",
"--allow-other",
"--read-write",
+ "--storage-classes", strings.Join(runner.Container.OutputStorageClasses, ","),
fmt.Sprintf("--crunchstat-interval=%v", runner.statInterval.Seconds())}
if runner.Container.RuntimeConstraints.KeepCacheRAM > 0 {
return fmt.Errorf("error creating container API client: %v", err)
}
+ runner.ContainerKeepClient.SetStorageClasses(runner.Container.OutputStorageClasses)
+ runner.DispatcherKeepClient.SetStorageClasses(runner.Container.OutputStorageClasses)
+
err = runner.ContainerArvClient.Call("GET", "containers", runner.Container.UUID, "secret_mounts", nil, &sm)
if err != nil {
if apierr, ok := err.(arvadosclient.APIServerError); !ok || apierr.HttpStatusCode != 404 {
var _ = Suite(&TestSuite{})
type TestSuite struct {
- client *arvados.Client
- api *ArvTestClient
- runner *ContainerRunner
- executor *stubExecutor
- keepmount string
+ client *arvados.Client
+ api *ArvTestClient
+ runner *ContainerRunner
+ executor *stubExecutor
+ keepmount string
+ testDispatcherKeepClient KeepTestClient
+ testContainerKeepClient KeepTestClient
}
func (s *TestSuite) SetUpTest(c *C) {
s.executor = &stubExecutor{}
var err error
s.api = &ArvTestClient{}
- s.runner, err = NewContainerRunner(s.client, s.api, &KeepTestClient{}, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
+ s.runner, err = NewContainerRunner(s.client, s.api, &s.testDispatcherKeepClient, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
c.Assert(err, IsNil)
s.runner.executor = s.executor
s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
- return s.api, &KeepTestClient{}, s.client, nil
+ return s.api, &s.testContainerKeepClient, s.client, nil
}
s.runner.RunArvMount = func(cmd []string, tok string) (*exec.Cmd, error) {
s.runner.ArvMountPoint = s.keepmount
}
type KeepTestClient struct {
- Called bool
- Content []byte
+ Called bool
+ Content []byte
+ StorageClasses []string
}
type stubExecutor struct {
client.Content = nil
}
+func (client *KeepTestClient) SetStorageClasses(sc []string) {
+ client.StorageClasses = sc
+}
+
type FileWrapper struct {
io.ReadCloser
len int64
s.runner.NewLogWriter = logs.NewTestLoggingWriter
s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
s.runner.Container.Command = []string{"./hw"}
+ s.runner.Container.OutputStorageClasses = []string{"default"}
imageID, err := s.runner.LoadImage()
c.Assert(err, IsNil)
return d, err
}
s.runner.MkArvClient = func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) {
- return &ArvTestClient{secretMounts: secretMounts}, &KeepTestClient{}, nil, nil
+ return &ArvTestClient{secretMounts: secretMounts}, &s.testContainerKeepClient, nil, nil
}
if extraMounts != nil && len(extraMounts) > 0 {
"output_path": "/tmp",
"priority": 1,
"runtime_constraints": {"vcpus":1,"ram":1000000},
- "state": "Locked"
+ "state": "Locked",
+ "output_storage_classes": ["default"]
}`, nil, 0, func() {
c.Check(s.executor.created.Command, DeepEquals, []string{"echo", "hello world"})
c.Check(s.executor.created.Image, Equals, "sha256:d8309758b8fe2c81034ffc8a10c36460b77db7bc5e7b448c4e5b684f9d95a678")
c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
c.Check(s.api.Logs["stdout"].String(), Matches, ".*hello world\n")
-
+ c.Check(s.testDispatcherKeepClient.StorageClasses, DeepEquals, []string{"default"})
+ c.Check(s.testContainerKeepClient.StorageClasses, DeepEquals, []string{"default"})
}
func (s *TestSuite) TestRunAlreadyRunning(c *C) {
c.Check(s.api.Logs["stdout"].String(), Matches, ".*/bin\n")
}
+func (s *TestSuite) TestFullRunSetOutputStorageClasses(c *C) {
+ s.fullRunHelper(c, `{
+ "command": ["pwd"],
+ "container_image": "`+arvadostest.DockerImage112PDH+`",
+ "cwd": "/bin",
+ "environment": {},
+ "mounts": {"/tmp": {"kind": "tmp"} },
+ "output_path": "/tmp",
+ "priority": 1,
+ "runtime_constraints": {},
+ "state": "Locked",
+ "output_storage_classes": ["foo", "bar"]
+}`, nil, 0, func() {
+ fmt.Fprintln(s.executor.created.Stdout, s.executor.created.WorkingDir)
+ })
+
+ c.Check(s.api.CalledWith("container.exit_code", 0), NotNil)
+ c.Check(s.api.CalledWith("container.state", "Complete"), NotNil)
+ c.Check(s.api.Logs["stdout"].String(), Matches, ".*/bin\n")
+ c.Check(s.testDispatcherKeepClient.StorageClasses, DeepEquals, []string{"foo", "bar"})
+ c.Check(s.testContainerKeepClient.StorageClasses, DeepEquals, []string{"foo", "bar"})
+}
+
func (s *TestSuite) TestStopOnSignal(c *C) {
s.executor.runFunc = func() {
s.executor.created.Stdout.Write([]byte("foo\n"))
cr.RunArvMount = am.ArvMountTest
cr.ContainerArvClient = &ArvTestClient{}
cr.ContainerKeepClient = &KeepTestClient{}
+ cr.Container.OutputStorageClasses = []string{"default"}
realTemp := c.MkDir()
certTemp := c.MkDir()
bindmounts, err := cr.SetupMounts()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
- "--read-write", "--crunchstat-interval=5",
+ "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
"--mount-by-pdh", "by_id", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}})
os.RemoveAll(cr.ArvMountPoint)
cr.Container.Mounts["/out"] = arvados.Mount{Kind: "tmp"}
cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
cr.Container.OutputPath = "/out"
+ cr.Container.OutputStorageClasses = []string{"foo", "bar"}
bindmounts, err := cr.SetupMounts()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
- "--read-write", "--crunchstat-interval=5",
+ "--read-write", "--storage-classes", "foo,bar", "--crunchstat-interval=5",
"--mount-by-pdh", "by_id", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{"/out": {realTemp + "/tmp2", false}, "/tmp": {realTemp + "/tmp3", false}})
os.RemoveAll(cr.ArvMountPoint)
cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
cr.Container.OutputPath = "/tmp"
cr.Container.RuntimeConstraints.API = true
+ cr.Container.OutputStorageClasses = []string{"default"}
bindmounts, err := cr.SetupMounts()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
- "--read-write", "--crunchstat-interval=5",
+ "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
"--mount-by-pdh", "by_id", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}, "/etc/arvados/ca-certificates.crt": {stubCertPath, true}})
os.RemoveAll(cr.ArvMountPoint)
bindmounts, err := cr.SetupMounts()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
- "--read-write", "--crunchstat-interval=5",
+ "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
"--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{"/keeptmp": {realTemp + "/keep1/tmp0", false}})
os.RemoveAll(cr.ArvMountPoint)
bindmounts, err := cr.SetupMounts()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
- "--read-write", "--crunchstat-interval=5",
+ "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
"--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{
"/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
bindmounts, err := cr.SetupMounts()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
- "--read-write", "--crunchstat-interval=5",
+ "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
"--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{
"/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
bindmounts, err := cr.SetupMounts()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
- "--read-write", "--crunchstat-interval=5",
+ "--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
"--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{
"/tmp": {realTemp + "/tmp2", false},
FinishedAt *time.Time `json:"finished_at"` // nil if not yet finished
GatewayAddress string `json:"gateway_address"`
InteractiveSessionStarted bool `json:"interactive_session_started"`
+ OutputStorageClasses []string `json:"output_storage_classes"`
}
// ContainerRequest is an arvados#container_request resource.
ExpiresAt time.Time `json:"expires_at"`
Filters []Filter `json:"filters"`
ContainerCount int `json:"container_count"`
+ OutputStorageClasses []string `json:"output_storage_classes"`
}
// Mount is special behavior to attach to a filesystem path or device.
kc.cache().Clear()
}
+func (kc *KeepClient) SetStorageClasses(sc []string) {
+ // make a copy so the caller can't mess with it.
+ kc.StorageClasses = append([]string{}, sc...)
+}
+
var (
// There are four global http.Client objects for the four
// possible permutations of TLS behavior (verify/skip-verify)
attribute :secret_mounts, :jsonbHash, default: {}
attribute :runtime_status, :jsonbHash, default: {}
attribute :runtime_auth_scopes, :jsonbHash, default: {}
+ attribute :output_storage_classes, :jsonbArray, default: ["default"]
serialize :environment, Hash
serialize :mounts, Hash
t.add :lock_count
t.add :gateway_address
t.add :interactive_session_started
+ t.add :output_storage_classes
end
# Supported states for a container
end
def self.full_text_searchable_columns
- super - ["secret_mounts", "secret_mounts_md5", "runtime_token", "gateway_address"]
+ super - ["secret_mounts", "secret_mounts_md5", "runtime_token", "gateway_address", "output_storage_classes"]
end
def self.searchable_columns *args
- super - ["secret_mounts_md5", "runtime_token", "gateway_address"]
+ super - ["secret_mounts_md5", "runtime_token", "gateway_address", "output_storage_classes"]
end
def logged_attributes
secret_mounts: req.secret_mounts,
runtime_token: req.runtime_token,
runtime_user_uuid: runtime_user.uuid,
- runtime_auth_scopes: runtime_auth_scopes
+ runtime_auth_scopes: runtime_auth_scopes,
+ output_storage_classes: req.output_storage_classes,
}
end
act_as_system_user do
:environment, :mounts, :output_path, :priority,
:runtime_constraints, :scheduling_parameters,
:secret_mounts, :runtime_token,
- :runtime_user_uuid, :runtime_auth_scopes)
+ :runtime_user_uuid, :runtime_auth_scopes,
+ :output_storage_classes)
end
case self.state
# already know how to properly treat them.
attribute :properties, :jsonbHash, default: {}
attribute :secret_mounts, :jsonbHash, default: {}
+ attribute :output_storage_classes, :jsonbArray, default: ["default"]
serialize :environment, Hash
serialize :mounts, Hash
t.add :scheduling_parameters
t.add :state
t.add :use_existing
+ t.add :output_storage_classes
end
# Supported states for a container request
:container_image, :cwd, :environment, :filters, :mounts,
:output_path, :priority, :runtime_token,
:runtime_constraints, :state, :container_uuid, :use_existing,
- :scheduling_parameters, :secret_mounts, :output_name, :output_ttl]
+ :scheduling_parameters, :secret_mounts, :output_name, :output_ttl,
+ :output_storage_classes]
def self.limit_index_columns_read
["mounts"]
'container_uuid' => container_uuid,
},
portable_data_hash: log_col.portable_data_hash,
- manifest_text: log_col.manifest_text)
+ manifest_text: log_col.manifest_text,
+ storage_classes_desired: self.output_storage_classes
+ )
completed_coll.save_with_unique_name!
end
end
owner_uuid: self.owner_uuid,
name: coll_name,
manifest_text: "",
+ storage_classes_desired: self.output_storage_classes,
properties: {
'type' => out_type,
'container_request' => uuid,
end
def self.full_text_searchable_columns
- super - ["mounts", "secret_mounts", "secret_mounts_md5", "runtime_token"]
+ super - ["mounts", "secret_mounts", "secret_mounts_md5", "runtime_token", "output_storage_classes"]
end
protected
log_coll = Collection.new(
owner_uuid: self.owner_uuid,
name: coll_name = "Container log for request #{uuid}",
- manifest_text: "")
+ manifest_text: "",
+ storage_classes_desired: self.output_storage_classes)
end
# copy logs from old container into CR's log collection
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class AddContainerOutputStorageClass < ActiveRecord::Migration[5.2]
+ def change
+ add_column :container_requests, :output_storage_classes, :jsonb, :default => ["default"]
+ add_column :containers, :output_storage_classes, :jsonb, :default => ["default"]
+ end
+end
SET default_with_oids = false;
+--
+-- Name: groups; Type: TABLE; Schema: public; Owner: -
+--
+
+CREATE TABLE public.groups (
+ id integer NOT NULL,
+ uuid character varying(255),
+ owner_uuid character varying(255),
+ created_at timestamp without time zone NOT NULL,
+ modified_by_client_uuid character varying(255),
+ modified_by_user_uuid character varying(255),
+ modified_at timestamp without time zone,
+ name character varying(255) NOT NULL,
+ description character varying(524288),
+ updated_at timestamp without time zone NOT NULL,
+ group_class character varying(255),
+ trash_at timestamp without time zone,
+ is_trashed boolean DEFAULT false NOT NULL,
+ delete_at timestamp without time zone,
+ properties jsonb DEFAULT '{}'::jsonb
+);
+
+
--
-- Name: api_client_authorizations; Type: TABLE; Schema: public; Owner: -
--
output_name character varying(255) DEFAULT NULL::character varying,
output_ttl integer DEFAULT 0 NOT NULL,
secret_mounts jsonb DEFAULT '{}'::jsonb,
- runtime_token text
+ runtime_token text,
+ output_storage_classes jsonb DEFAULT '["default"]'::jsonb
);
runtime_token text,
lock_count integer DEFAULT 0 NOT NULL,
gateway_address character varying,
- interactive_session_started boolean DEFAULT false NOT NULL
+ interactive_session_started boolean DEFAULT false NOT NULL,
+ output_storage_classes jsonb DEFAULT '["default"]'::jsonb
);
ALTER SEQUENCE public.containers_id_seq OWNED BY public.containers.id;
---
--- Name: groups; Type: TABLE; Schema: public; Owner: -
---
-
-CREATE TABLE public.groups (
- id integer NOT NULL,
- uuid character varying(255),
- owner_uuid character varying(255),
- created_at timestamp without time zone NOT NULL,
- modified_by_client_uuid character varying(255),
- modified_by_user_uuid character varying(255),
- modified_at timestamp without time zone,
- name character varying(255) NOT NULL,
- description character varying(524288),
- updated_at timestamp without time zone NOT NULL,
- group_class character varying(255),
- trash_at timestamp without time zone,
- is_trashed boolean DEFAULT false NOT NULL,
- delete_at timestamp without time zone,
- properties jsonb DEFAULT '{}'::jsonb
-);
-
-
--
-- Name: groups_id_seq; Type: SEQUENCE; Schema: public; Owner: -
--
('20201105190435'),
('20201202174753'),
('20210108033940'),
-('20210126183521');
+('20210126183521'),
+('20210621204455');
cr.save!
end
end
+
+ test "default output_storage_classes" do
+ act_as_user users(:active) do
+ cr = create_minimal_req!(priority: 1,
+ state: ContainerRequest::Committed,
+ output_name: 'foo')
+ run_container(cr)
+ cr.reload
+ output = Collection.find_by_uuid(cr.output_uuid)
+ assert_equal ["default"], output.storage_classes_desired
+ end
+ end
+
+ test "setting output_storage_classes" do
+ act_as_user users(:active) do
+ cr = create_minimal_req!(priority: 1,
+ state: ContainerRequest::Committed,
+ output_name: 'foo',
+ output_storage_classes: ["foo_storage_class", "bar_storage_class"])
+ run_container(cr)
+ cr.reload
+ output = Collection.find_by_uuid(cr.output_uuid)
+ assert_equal ["foo_storage_class", "bar_storage_class"], output.storage_classes_desired
+ log = Collection.find_by_uuid(cr.log_uuid)
+ assert_equal ["foo_storage_class", "bar_storage_class"], log.storage_classes_desired
+ end
+ end
+
+ test "reusing container with different container_request.output_storage_classes" do
+ common_attrs = {cwd: "test",
+ priority: 1,
+ command: ["echo", "hello"],
+ output_path: "test",
+ runtime_constraints: {"vcpus" => 4,
+ "ram" => 12000000000},
+ mounts: {"test" => {"kind" => "json"}},
+ environment: {"var" => "value1"},
+ output_storage_classes: ["foo_storage_class"]}
+ set_user_from_auth :active
+ cr1 = create_minimal_req!(common_attrs.merge({state: ContainerRequest::Committed}))
+ cont1 = run_container(cr1)
+ cr1.reload
+
+ output1 = Collection.find_by_uuid(cr1.output_uuid)
+
+ # Testing with use_existing default value
+ cr2 = create_minimal_req!(common_attrs.merge({state: ContainerRequest::Uncommitted,
+ output_storage_classes: ["bar_storage_class"]}))
+
+ assert_not_nil cr1.container_uuid
+ assert_nil cr2.container_uuid
+
+ # Update cr2 to commited state, check for reuse, then run it
+ cr2.update_attributes!({state: ContainerRequest::Committed})
+ assert_equal cr1.container_uuid, cr2.container_uuid
+
+ cr2.reload
+ output2 = Collection.find_by_uuid(cr2.output_uuid)
+
+ # the original CR output has the original storage class,
+ # but the second CR output has the new storage class.
+ assert_equal ["foo_storage_class"], cont1.output_storage_classes
+ assert_equal ["foo_storage_class"], output1.storage_classes_desired
+ assert_equal ["bar_storage_class"], output2.storage_classes_desired
+ end
end
for _, sc := range strings.Split(req.Header.Get("X-Keep-Storage-Classes"), ",") {
scl = append(scl, strings.Trim(sc, " "))
}
- kc.StorageClasses = scl
+ kc.SetStorageClasses(scl)
}
_, err = fmt.Sscanf(req.Header.Get("Content-Length"), "%d", &expectLength)