X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/454ee2b8f0385c542b6f1165a3baf2820425e1a3..3911cd836c4e937262d48f9b0af703a9d7d68cdd:/services/api/app/models/container_request.rb diff --git a/services/api/app/models/container_request.rb b/services/api/app/models/container_request.rb index 24882860eb..5a78181473 100644 --- a/services/api/app/models/container_request.rb +++ b/services/api/app/models/container_request.rb @@ -119,20 +119,61 @@ class ContainerRequest < ArvadosModel end def finalize_if_needed - if state == Committed && Container.find_by_uuid(container_uuid).final? - reload - act_as_system_user do - leave_modified_by_user_alone do - finalize! + return if state != Committed + while true + # get container lock first, then lock current container request + # (same order as Container#handle_completed). Locking always + # reloads the Container and ContainerRequest records. + c = Container.find_by_uuid(container_uuid) + c.lock! + self.lock! + + if container_uuid != c.uuid + # After locking, we've noticed a race, the container_uuid is + # different than the container record we just loaded. This + # can happen if Container#handle_completed scheduled a new + # container for retry and set container_uuid while we were + # waiting on the container lock. Restart the loop and get the + # new container. + redo + end + + if state == Committed && c.final? + # The current container is + act_as_system_user do + leave_modified_by_user_alone do + finalize! + end end end + return true end end # Finalize the container request after the container has # finished/cancelled. def finalize! - update_collections(container: Container.find_by_uuid(container_uuid)) + container = Container.find_by_uuid(container_uuid) + update_collections(container: container) + + if container.state == Container::Complete + log_col = Collection.where(portable_data_hash: container.log).first + if log_col + # Need to save collection + completed_coll = Collection.new( + owner_uuid: self.owner_uuid, + name: "Container log for container #{container_uuid}", + properties: { + 'type' => 'log', + 'container_request' => self.uuid, + 'container_uuid' => container_uuid, + }, + portable_data_hash: log_col.portable_data_hash, + manifest_text: log_col.manifest_text) + completed_coll.save_with_unique_name! + end + end + update_attributes!(state: Final) end @@ -166,6 +207,7 @@ class ContainerRequest < ArvadosModel end if out_type == "log" + # Copy the log into a merged collection src = Arv::Collection.new(manifest) dst = Arv::Collection.new(coll.manifest_text) dst.cp_r("./", ".", src) @@ -196,7 +238,7 @@ class ContainerRequest < ArvadosModel self.mounts ||= {} self.secret_mounts ||= {} self.cwd ||= "." - self.container_count_max ||= Rails.configuration.Containers.MaxComputeVMs + self.container_count_max ||= Rails.configuration.Containers.MaxRetryAttempts self.scheduling_parameters ||= {} self.output_ttl ||= 0 self.priority ||= 0 @@ -210,7 +252,18 @@ class ContainerRequest < ArvadosModel return false end if state_changed? and state == Committed and container_uuid.nil? - self.container_uuid = Container.resolve(self).uuid + while true + c = Container.resolve(self) + c.lock! + if c.state == Container::Cancelled + # Lost a race, we have a lock on the container but the + # container was cancelled in a different request, restart + # the loop and resolve request to a new container. + redo + end + self.container_uuid = c.uuid + break + end end if self.container_uuid != self.container_uuid_was if self.container_count_changed?