cudaComputeCapabilityMin: "9.0"
deviceCountMin: 1
deviceCountMax: 1
+
+ arv:UsePreemptible:
+ usePreemptible: true
{% endcodeblock %}
h2(#RunInSingleContainer). arv:RunInSingleContainer
|deviceCountMin|integer|Minimum number of GPU devices to allocate on a single node. Required.|
|deviceCountMax|integer|Maximum number of GPU devices to allocate on a single node. Optional. If not specified, same as @minDeviceCount@.|
+h2(#UsePreemptible). arv:UsePreemptible
+
+Specify whether a workflow step should request preemptible (e.g. AWS Spot market) instances. Such instances are generally cheaper, but can be taken back by the cloud provider at any time (preempted) causing the step to fail. When this happens, Arvados will automatically re-try the step, up to the configuration value of @Containers.MaxRetryAttempts@ (default 3) times.
+
+table(table table-bordered table-condensed).
+|_. Field |_. Type |_. Description |
+|usePreemptible|boolean|Required, true to opt-in to using preemptible instances, false to opt-out.|
+
h2. arv:dockerCollectionPDH
This is an optional extension field appearing on the standard @DockerRequirement@. It specifies the portable data hash of the Arvados collection containing the Docker image. If present, it takes precedence over @dockerPull@ or @dockerImageId@.
# If false, containers are scheduled on preemptible instances
# only when requested by the submitter.
#
- # Note that arvados-cwl-runner does not currently offer a
- # feature to request preemptible instances, so this value
- # effectively acts as a cluster-wide decision about whether to
- # use preemptible instances.
- #
# This flag is ignored if no preemptible instance types are
# configured, and has no effect on top-level containers.
AlwaysUsePreemptibleInstances: true
parser.add_argument("--http-timeout", type=int,
default=5*60, dest="http_timeout", help="API request timeout in seconds. Default is 300 seconds (5 minutes).")
+ exgroup = parser.add_mutually_exclusive_group()
+ exgroup.add_argument("--enable-preemptible", dest="enable_preemptible", default=None, action="store_true", help="Use preemptible instances. Control individual steps with arv:UsePreemptible hint.")
+ exgroup.add_argument("--disable-preemptible", dest="enable_preemptible", default=None, action="store_false", help="Don't use preemptible instances.")
+
parser.add_argument(
"--skip-schemas",
action="store_true",
"http://arvados.org/cwl#ClusterTarget",
"http://arvados.org/cwl#OutputStorageClass",
"http://arvados.org/cwl#ProcessProperties",
- "http://commonwl.org/cwltool#CUDARequirement"
+ "http://commonwl.org/cwltool#CUDARequirement",
+ "http://arvados.org/cwl#UsePreemptible",
])
def exit_signal_handler(sigcode, frame):
doc: |
Maximum number of GPU devices to request. If not specified,
same as `cudaDeviceCountMin`.
+
+- name: UsePreemptible
+ type: record
+ extends: cwl:ProcessRequirement
+ inVocab: false
+ doc: |
+ Specify a workflow step should opt-in or opt-out of using preemptible (spot) instances.
+ fields:
+ class:
+ type: string
+ doc: "Always 'arv:ProcessProperties"
+ jsonldPredicate:
+ _id: "@type"
+ _type: "@vocab"
+ usePreemptible: boolean
doc: |
Maximum number of GPU devices to request. If not specified,
same as `cudaDeviceCountMin`.
+
+- name: UsePreemptible
+ type: record
+ extends: cwl:ProcessRequirement
+ inVocab: false
+ doc: |
+ Specify a workflow step should opt-in or opt-out of using preemptible (spot) instances.
+ fields:
+ class:
+ type: string
+ doc: "Always 'arv:ProcessProperties"
+ jsonldPredicate:
+ _id: "@type"
+ _type: "@vocab"
+ usePreemptible: boolean
doc: |
Maximum number of GPU devices to request. If not specified,
same as `cudaDeviceCountMin`.
+
+- name: UsePreemptible
+ type: record
+ extends: cwl:ProcessRequirement
+ inVocab: false
+ doc: |
+ Specify a workflow step should opt-in or opt-out of using preemptible (spot) instances.
+ fields:
+ class:
+ type: string
+ doc: "Always 'arv:ProcessProperties"
+ jsonldPredicate:
+ _id: "@type"
+ _type: "@vocab"
+ usePreemptible: boolean
"hardware_capability": aslist(cuda_req["cudaComputeCapability"])[0]
}
+ if runtimeContext.enable_preemptible is False:
+ scheduling_parameters["preemptible"] = False
+ else:
+ preemptible_req, _ = self.get_requirement("http://arvados.org/cwl#UsePreemptible")
+ if preemptible_req:
+ scheduling_parameters["preemptible"] = preemptible_req["usePreemptible"]
+ elif runtimeContext.enable_preemptible is True:
+ scheduling_parameters["preemptible"] = True
+ elif runtimeContext.enable_preemptible is None:
+ pass
+
if self.timelimit is not None and self.timelimit > 0:
scheduling_parameters["max_run_time"] = self.timelimit
if self.enable_dev:
command.append("--enable-dev")
+ if runtimeContext.enable_preemptible is True:
+ command.append("--enable-preemptible")
+
+ if runtimeContext.enable_preemptible is False:
+ command.append("--disable-preemptible")
+
command.extend([workflowpath, "/var/lib/cwl/cwl.input.json"])
container_req["command"] = command
self.always_submit_runner = False
self.collection_cache_size = 256
self.match_local_docker = False
+ self.enable_preemptible = None
super(ArvRuntimeContext, self).__init__(kwargs)
import arvados.collection
from .util import collectionUUID
-import ruamel.yaml as yaml
+from ruamel.yaml import YAML
from ruamel.yaml.comments import CommentedMap, CommentedSeq
import arvados_cwl.arvdocker
textIO = StringIO(text.decode('utf-8'))
else:
textIO = StringIO(text)
- return yaml.safe_load(textIO)
+ yamlloader = YAML(typ='safe', pure=True)
+ return yamlloader.load(textIO)
else:
return {}
body=JsonDiffMatcher(container_request))
+ # The test passes no builder.resources
+ # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
+ @mock.patch("arvados.commands.keepdocker.list_images_in_arv")
+ def test_run_preemptible_hint(self, keepdocker):
+ arvados_cwl.add_arv_hints()
+ for enable_preemptible in (None, True, False):
+ for preemptible_hint in (None, True, False):
+ arv_docker_clear_cache()
+
+ runner = mock.MagicMock()
+ runner.ignore_docker_for_reuse = False
+ runner.intermediate_output_ttl = 0
+ runner.secret_store = cwltool.secrets.SecretStore()
+ runner.api._rootDesc = {"revision": "20210628"}
+
+ keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
+ runner.api.collections().get().execute.return_value = {
+ "portable_data_hash": "99999999999999999999999999999993+99"}
+
+ if preemptible_hint is not None:
+ hints = [{
+ "class": "http://arvados.org/cwl#UsePreemptible",
+ "usePreemptible": preemptible_hint
+ }]
+ else:
+ hints = []
+
+ tool = cmap({
+ "inputs": [],
+ "outputs": [],
+ "baseCommand": "ls",
+ "arguments": [{"valueFrom": "$(runtime.outdir)"}],
+ "id": "",
+ "class": "CommandLineTool",
+ "cwlVersion": "v1.2",
+ "hints": hints
+ })
+
+ loadingContext, runtimeContext = self.helper(runner)
+
+ runtimeContext.name = 'test_run_enable_preemptible_'+str(enable_preemptible)+str(preemptible_hint)
+ runtimeContext.enable_preemptible = enable_preemptible
+
+ arvtool = cwltool.load_tool.load_tool(tool, loadingContext)
+ arvtool.formatgraph = None
+
+ # Test the interactions between --enable/disable-preemptible
+ # and UsePreemptible hint
+
+ if enable_preemptible is None:
+ if preemptible_hint is None:
+ sched = {}
+ else:
+ sched = {'preemptible': preemptible_hint}
+ else:
+ if preemptible_hint is None:
+ sched = {'preemptible': enable_preemptible}
+ else:
+ sched = {'preemptible': enable_preemptible and preemptible_hint}
+
+ for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
+ j.run(runtimeContext)
+ runner.api.container_requests().create.assert_called_with(
+ body=JsonDiffMatcher({
+ 'environment': {
+ 'HOME': '/var/spool/cwl',
+ 'TMPDIR': '/tmp'
+ },
+ 'name': runtimeContext.name,
+ 'runtime_constraints': {
+ 'vcpus': 1,
+ 'ram': 268435456
+ },
+ 'use_existing': True,
+ 'priority': 500,
+ 'mounts': {
+ '/tmp': {'kind': 'tmp',
+ "capacity": 1073741824
+ },
+ '/var/spool/cwl': {'kind': 'tmp',
+ "capacity": 1073741824 }
+ },
+ 'state': 'Committed',
+ 'output_name': 'Output for step '+runtimeContext.name,
+ 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
+ 'output_path': '/var/spool/cwl',
+ 'output_ttl': 0,
+ 'container_image': '99999999999999999999999999999993+99',
+ 'command': ['ls', '/var/spool/cwl'],
+ 'cwd': '/var/spool/cwl',
+ 'scheduling_parameters': sched,
+ 'properties': {},
+ 'secret_mounts': {},
+ 'output_storage_classes': ["default"]
+ }))
+
+
class TestWorkflow(unittest.TestCase):
def setUp(self):
self.assertEqual(exited, 0)
+ @stubs
+ def test_submit_enable_preemptible(self, stubs):
+ exited = arvados_cwl.main(
+ ["--submit", "--no-wait", "--api=containers", "--debug", "--enable-preemptible",
+ "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+ stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+
+ expect_container = copy.deepcopy(stubs.expect_container_spec)
+ expect_container['command'] = ['arvados-cwl-runner', '--local', '--api=containers',
+ '--no-log-timestamps', '--disable-validate', '--disable-color',
+ '--eval-timeout=20', '--thread-count=0',
+ '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue',
+ '--enable-preemptible',
+ '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
+
+ stubs.api.container_requests().create.assert_called_with(
+ body=JsonDiffMatcher(expect_container))
+ self.assertEqual(stubs.capture_stdout.getvalue(),
+ stubs.expect_container_request_uuid + '\n')
+ self.assertEqual(exited, 0)
+
+ @stubs
+ def test_submit_disable_preemptible(self, stubs):
+ exited = arvados_cwl.main(
+ ["--submit", "--no-wait", "--api=containers", "--debug", "--disable-preemptible",
+ "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+ stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+
+ expect_container = copy.deepcopy(stubs.expect_container_spec)
+ expect_container['command'] = ['arvados-cwl-runner', '--local', '--api=containers',
+ '--no-log-timestamps', '--disable-validate', '--disable-color',
+ '--eval-timeout=20', '--thread-count=0',
+ '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue',
+ '--disable-preemptible',
+ '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
+
+ stubs.api.container_requests().create.assert_called_with(
+ body=JsonDiffMatcher(expect_container))
+ self.assertEqual(stubs.capture_stdout.getvalue(),
+ stubs.expect_container_request_uuid + '\n')
+ self.assertEqual(exited, 0)
+
+
class TestCreateWorkflow(unittest.TestCase):
existing_workflow_uuid = "zzzzz-7fd4e-validworkfloyml"
expect_workflow = StripYAMLComments(