From f5512fd7f9abe306740af464551938461033a935 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Mon, 8 Aug 2022 11:24:30 -0400 Subject: [PATCH] 18205: Add container cost accounting fields. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- .../container_requests.html.textile.liquid | 1 + .../methods/containers.html.textile.liquid | 2 + lib/config/load.go | 1 + lib/controller/integration_test.go | 2 + lib/crunchrun/crunchrun.go | 6 ++ sdk/go/arvados/container.go | 3 + services/api/app/models/container.rb | 8 ++- services/api/app/models/container_request.rb | 30 +++++++- .../20220804133317_add_cost_to_containers.rb | 11 +++ services/api/db/structure.sql | 10 ++- .../api/test/unit/container_request_test.rb | 69 ++++++++++++++++++- 11 files changed, 135 insertions(+), 8 deletions(-) create mode 100644 services/api/db/migrate/20220804133317_add_cost_to_containers.rb diff --git a/doc/api/methods/container_requests.html.textile.liquid b/doc/api/methods/container_requests.html.textile.liquid index 15fa207b1c..11f4f34fc8 100644 --- a/doc/api/methods/container_requests.html.textile.liquid +++ b/doc/api/methods/container_requests.html.textile.liquid @@ -62,6 +62,7 @@ table(table table-bordered table-condensed). |runtime_auth_scopes|array of string|The scopes associated with the auth token used to run this container.|| |output_storage_classes|array of strings|The storage classes that will be used for the log and output collections of this container request|default is ["default"]| |output_properties|hash|User metadata properties to set on the output collection. The output collection will also have default properties "type" ("intermediate" or "output") and "container_request" (the uuid of container request that produced the collection).| +|cumulative_cost|number|Estimated cost of the cloud VMs used to satisfy the request, including retried attempts and completed subrequests, but not including reused containers.|0 if container was reused or VM price information was not available.| h2(#priority). Priority diff --git a/doc/api/methods/containers.html.textile.liquid b/doc/api/methods/containers.html.textile.liquid index 43163c5550..e6891621cd 100644 --- a/doc/api/methods/containers.html.textile.liquid +++ b/doc/api/methods/containers.html.textile.liquid @@ -61,6 +61,8 @@ Generally this will contain additional keys that are not present in any correspo |interactive_session_started|boolean|Indicates whether @arvados-client shell@ has been used to run commands in the container, which may have altered the container's behavior and output.|| |output_storage_classes|array of strings|The storage classes that will be used for the log and output collections of this container|| |output_properties|hash|User metadata properties to set on the output collection.| +|cost|number|Estimated cost of the cloud VM used to run the container.|0 if not available.| +|subrequests_cost|number|Total estimated cumulative cost of container requests submitted by this container.|0 if not available.| h2(#container_states). Container states diff --git a/lib/config/load.go b/lib/config/load.go index fbd01488a0..9269ddf27f 100644 --- a/lib/config/load.go +++ b/lib/config/load.go @@ -448,6 +448,7 @@ func (ldr *Loader) setLoopbackInstanceType(cfg *arvados.Config) error { RAM: hostram, Scratch: scratch, IncludedScratch: scratch, + Price: 1.0, }} cfg.Clusters[id] = cc } diff --git a/lib/controller/integration_test.go b/lib/controller/integration_test.go index b0ec4293a3..4c49c007eb 100644 --- a/lib/controller/integration_test.go +++ b/lib/controller/integration_test.go @@ -1245,6 +1245,8 @@ func (s *IntegrationSuite) runContainer(c *check.C, clusterID string, token stri time.Sleep(time.Second / 2) } } + c.Logf("cr.CumulativeCost == %f", cr.CumulativeCost) + c.Check(cr.CumulativeCost, check.Not(check.Equals), 0.0) if expectExitCode >= 0 { c.Check(ctr.State, check.Equals, arvados.ContainerStateComplete) c.Check(ctr.ExitCode, check.Equals, expectExitCode) diff --git a/lib/crunchrun/crunchrun.go b/lib/crunchrun/crunchrun.go index eadf22876f..ee9115d8d8 100644 --- a/lib/crunchrun/crunchrun.go +++ b/lib/crunchrun/crunchrun.go @@ -140,6 +140,7 @@ type ContainerRunner struct { MkArvClient func(token string) (IArvadosClient, IKeepClient, *arvados.Client, error) finalState string parentTemp string + costStartTime time.Time keepstoreLogger io.WriteCloser keepstoreLogbuf *bufThenWrite @@ -1457,6 +1458,10 @@ func (runner *ContainerRunner) UpdateContainerFinal() error { if runner.finalState == "Complete" && runner.OutputPDH != nil { update["output"] = *runner.OutputPDH } + var it arvados.InstanceType + if j := os.Getenv("InstanceType"); j != "" && json.Unmarshal([]byte(j), &it) == nil && it.Price > 0 { + update["cost"] = it.Price * time.Now().Sub(runner.costStartTime).Seconds() / time.Hour.Seconds() + } return runner.DispatcherArvClient.Update("containers", runner.Container.UUID, arvadosclient.Dict{"container": update}, nil) } @@ -1489,6 +1494,7 @@ func (runner *ContainerRunner) Run() (err error) { runner.CrunchLog.Printf("Using FUSE mount: %s", v) runner.CrunchLog.Printf("Using container runtime: %s", runner.executor.Runtime()) runner.CrunchLog.Printf("Executing container: %s", runner.Container.UUID) + runner.costStartTime = time.Now() hostname, hosterr := os.Hostname() if hosterr != nil { diff --git a/sdk/go/arvados/container.go b/sdk/go/arvados/container.go index 466221fe19..45f92017c4 100644 --- a/sdk/go/arvados/container.go +++ b/sdk/go/arvados/container.go @@ -38,6 +38,8 @@ type Container struct { RuntimeToken string `json:"runtime_token"` AuthUUID string `json:"auth_uuid"` Log string `json:"log"` + Cost float64 `json:"cost"` + SubrequestsCost float64 `json:"subrequests_cost"` } // ContainerRequest is an arvados#container_request resource. @@ -77,6 +79,7 @@ type ContainerRequest struct { ContainerCount int `json:"container_count"` OutputStorageClasses []string `json:"output_storage_classes"` OutputProperties map[string]interface{} `json:"output_properties"` + CumulativeCost float64 `json:"cumulative_cost"` } // Mount is special behavior to attach to a filesystem path or device. diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb index 43af0721c4..3e3f73b838 100644 --- a/services/api/app/models/container.rb +++ b/services/api/app/models/container.rb @@ -83,6 +83,8 @@ class Container < ArvadosModel t.add :interactive_session_started t.add :output_storage_classes t.add :output_properties + t.add :cost + t.add :subrequests_cost end # Supported states for a container @@ -478,8 +480,9 @@ class Container < ArvadosModel def validate_change permitted = [:state] - progress_attrs = [:progress, :runtime_status, :log, :output, :output_properties, :exit_code] final_attrs = [:finished_at] + progress_attrs = [:progress, :runtime_status, :subrequests_cost, :cost, + :log, :output, :output_properties, :exit_code] if self.new_record? permitted.push(:owner_uuid, :command, :container_image, :cwd, @@ -516,7 +519,7 @@ class Container < ArvadosModel when Running permitted.push :finished_at, *progress_attrs when Queued, Locked - permitted.push :finished_at, :log, :runtime_status + permitted.push :finished_at, :log, :runtime_status, :cost end else @@ -719,6 +722,7 @@ class Container < ArvadosModel cr.with_lock do leave_modified_by_user_alone do # Use row locking because this increments container_count + cr.cumulative_cost += self.cost + self.subrequests_cost cr.container_uuid = c.uuid cr.save! end diff --git a/services/api/app/models/container_request.rb b/services/api/app/models/container_request.rb index 9116035905..47e2769a8c 100644 --- a/services/api/app/models/container_request.rb +++ b/services/api/app/models/container_request.rb @@ -33,6 +33,7 @@ class ContainerRequest < ArvadosModel serialize :scheduling_parameters, Hash after_find :fill_container_defaults_after_find + after_initialize { @state_was_when_initialized = self.state_was } # see finalize_if_needed before_validation :fill_field_defaults, :if => :new_record? before_validation :fill_container_defaults validates :command, :container_image, :output_path, :cwd, :presence => true @@ -80,6 +81,7 @@ class ContainerRequest < ArvadosModel t.add :use_existing t.add :output_storage_classes t.add :output_properties + t.add :cumulative_cost end # Supported states for a container request @@ -173,6 +175,30 @@ class ContainerRequest < ArvadosModel def finalize! container = Container.find_by_uuid(container_uuid) if !container.nil? + # We don't want to add the container cost if the container was + # already finished when this CR was committed. But we are + # running in an after_save hook after a lock/reload, so + # state_was has already been updated to Committed regardless. + # Hence the need for @state_was_when_initialized. + if @state_was_when_initialized == Committed + # Add the final container cost to our cumulative cost (which + # may already be non-zero from previous attempts if + # container_count_max > 1). + self.cumulative_cost += container.cost + container.subrequests_cost + end + + # Add our cumulative cost to the subrequests_cost of the + # requesting container, if any. + if self.requesting_container_uuid + Container.where( + uuid: self.requesting_container_uuid, + state: Container::Running, + ).each do |c| + c.subrequests_cost += self.cumulative_cost + c.save! + end + end + update_collections(container: container) if container.state == Container::Complete @@ -461,7 +487,7 @@ class ContainerRequest < ArvadosModel case self.state when Committed - permitted.push :priority, :container_count_max, :container_uuid + permitted.push :priority, :container_count_max, :container_uuid, :cumulative_cost if self.priority.nil? self.errors.add :priority, "cannot be nil" @@ -478,7 +504,7 @@ class ContainerRequest < ArvadosModel when Final if self.state_was == Committed # "Cancel" means setting priority=0, state=Committed - permitted.push :priority + permitted.push :priority, :cumulative_cost if current_user.andand.is_admin permitted.push :output_uuid, :log_uuid diff --git a/services/api/db/migrate/20220804133317_add_cost_to_containers.rb b/services/api/db/migrate/20220804133317_add_cost_to_containers.rb new file mode 100644 index 0000000000..188187e394 --- /dev/null +++ b/services/api/db/migrate/20220804133317_add_cost_to_containers.rb @@ -0,0 +1,11 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +class AddCostToContainers < ActiveRecord::Migration[5.2] + def change + add_column :containers, :cost, :float, null: false, default: 0 + add_column :containers, :subrequests_cost, :float, null: false, default: 0 + add_column :container_requests, :cumulative_cost, :float, null: false, default: 0 + end +end diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql index c5f6d567bf..42fb8b3aac 100644 --- a/services/api/db/structure.sql +++ b/services/api/db/structure.sql @@ -481,7 +481,8 @@ CREATE TABLE public.container_requests ( secret_mounts jsonb DEFAULT '{}'::jsonb, runtime_token text, output_storage_classes jsonb DEFAULT '["default"]'::jsonb, - output_properties jsonb DEFAULT '{}'::jsonb + output_properties jsonb DEFAULT '{}'::jsonb, + cumulative_cost double precision DEFAULT 0.0 NOT NULL ); @@ -545,7 +546,9 @@ CREATE TABLE public.containers ( gateway_address character varying, interactive_session_started boolean DEFAULT false NOT NULL, output_storage_classes jsonb DEFAULT '["default"]'::jsonb, - output_properties jsonb DEFAULT '{}'::jsonb + output_properties jsonb DEFAULT '{}'::jsonb, + cost double precision DEFAULT 0.0 NOT NULL, + subrequests_cost double precision DEFAULT 0.0 NOT NULL ); @@ -3182,6 +3185,7 @@ INSERT INTO "schema_migrations" (version) VALUES ('20220301155729'), ('20220303204419'), ('20220401153101'), -('20220505112900'); +('20220505112900'), +('20220804133317'); diff --git a/services/api/test/unit/container_request_test.rb b/services/api/test/unit/container_request_test.rb index e5c0085184..e6db412179 100644 --- a/services/api/test/unit/container_request_test.rb +++ b/services/api/test/unit/container_request_test.rb @@ -231,11 +231,12 @@ class ContainerRequestTest < ActiveSupport::TestCase act_as_system_user do Container.find_by_uuid(cr.container_uuid). - update_attributes!(state: Container::Cancelled) + update_attributes!(state: Container::Cancelled, cost: 1.25) end cr.reload assert_equal "Final", cr.state + assert_equal 1.25, cr.cumulative_cost assert_equal users(:active).uuid, cr.modified_by_user_uuid end @@ -261,12 +262,14 @@ class ContainerRequestTest < ActiveSupport::TestCase log_pdh = 'fa7aeb5140e2848d39b416daeef4ffc5+45' act_as_system_user do c.update_attributes!(state: Container::Complete, + cost: 1.25, output: output_pdh, log: log_pdh) end cr.reload assert_equal "Final", cr.state + assert_equal 1.25, cr.cumulative_cost assert_equal users(:active).uuid, cr.modified_by_user_uuid assert_not_nil cr.output_uuid @@ -788,6 +791,7 @@ class ContainerRequestTest < ActiveSupport::TestCase prev_container_uuid = cr.container_uuid act_as_system_user do + c.update_attributes!(cost: 0.5, subrequests_cost: 1.25) c.update_attributes!(state: Container::Cancelled) end @@ -800,6 +804,9 @@ class ContainerRequestTest < ActiveSupport::TestCase c = act_as_system_user do c = Container.find_by_uuid(cr.container_uuid) + c.update_attributes!(state: Container::Locked) + c.update_attributes!(state: Container::Running) + c.update_attributes!(cost: 0.125) c.update_attributes!(state: Container::Cancelled) c end @@ -809,6 +816,7 @@ class ContainerRequestTest < ActiveSupport::TestCase assert_equal "Final", cr.state assert_equal prev_container_uuid, cr.container_uuid assert_not_equal cr2.container_uuid, cr.container_uuid + assert_equal 1.875, cr.cumulative_cost end test "Retry on container cancelled with runtime_token" do @@ -1511,4 +1519,63 @@ class ContainerRequestTest < ActiveSupport::TestCase end end + test "Cumulative cost includes retried attempts but not reused containers" do + set_user_from_auth :active + cr = create_minimal_req!(priority: 5, state: "Committed", container_count_max: 3) + c = Container.find_by_uuid cr.container_uuid + act_as_system_user do + c.update_attributes!(state: Container::Locked) + c.update_attributes!(state: Container::Running) + c.update_attributes!(state: Container::Cancelled, cost: 3) + end + cr.reload + assert_equal 3, cr.cumulative_cost + + c = Container.find_by_uuid cr.container_uuid + lock_and_run c + c.reload + assert_equal 0, c.subrequests_cost + + # cr2 is a child/subrequest + cr2 = with_container_auth(c) do + create_minimal_req!(priority: 10, state: "Committed", container_count_max: 1, command: ["echo", "foo2"]) + end + assert_equal c.uuid, cr2.requesting_container_uuid + c2 = Container.find_by_uuid cr2.container_uuid + act_as_system_user do + c2.update_attributes!(state: Container::Locked) + c2.update_attributes!(state: Container::Running) + logc = Collection.new(owner_uuid: system_user_uuid, + manifest_text: ". ef772b2f28e2c8ca84de45466ed19ee9+7815 0:0:arv-mount.txt\n") + logc.save! + c2.update_attributes!(state: Container::Complete, + exit_code: 0, + output: '1f4b0bc7583c2a7f9102c395f4ffc5e3+45', + log: logc.portable_data_hash, + cost: 7) + end + c.reload + assert_equal 7, c.subrequests_cost + + # cr3 is an identical child/subrequest, will reuse c2 + cr3 = with_container_auth(c) do + create_minimal_req!(priority: 10, state: "Committed", container_count_max: 1, command: ["echo", "foo2"]) + end + assert_equal c.uuid, cr3.requesting_container_uuid + c3 = Container.find_by_uuid cr3.container_uuid + assert_equal c2.uuid, c3.uuid + assert_equal Container::Complete, c3.state + c.reload + assert_equal 7, c.subrequests_cost + + act_as_system_user do + c.update_attributes!(state: Container::Complete, exit_code: 0, cost: 9) + end + + c.reload + assert_equal 7, c.subrequests_cost + cr.reload + assert_equal 3+7+9, cr.cumulative_cost + end + end -- 2.30.2