From: Peter Amstutz Date: Mon, 25 Apr 2022 20:06:06 +0000 (-0400) Subject: Merge branch '17301-cwl-oom' refs #17301 X-Git-Tag: 2.5.0~196 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/ff91b02b4c768ca9f583454b0909fa030d89debe?hp=0c847cae0cb8ad2c55cf9330b84b49ce9d54b4e8 Merge branch '17301-cwl-oom' refs #17301 Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- diff --git a/doc/architecture/manifest-format.html.textile.liquid b/doc/architecture/manifest-format.html.textile.liquid index 1780768bc3..e1057a42ea 100644 --- a/doc/architecture/manifest-format.html.textile.liquid +++ b/doc/architecture/manifest-format.html.textile.liquid @@ -60,6 +60,28 @@ A normalized manifest is a manifest that meets the following additional restrict * Blocks within a stream are ordered based on order of file tokens of the stream. A given block is listed at most once in a stream. * Filename must not contain @"/"@ (the stream name represents the path prefix) +h3. Estimating manifest size + +Here's a formula for estimating manifest size as stored in the database, assuming efficiently packed blocks. + +
+manifest_size =
+   + (total data size / 64 MB) * 40
+   + sum(number of files * 20)
+   + sum(size of all directory paths)
+   + sum(size of all file names)
+
+ +Here is the size when including block signatures. The block signatures authorize access to fetch each block from a Keep server, as described below. The signed manifest text is what is actually transferred to/from the API server and stored in RAM by @arv-mount@. The effective upper limit on how large a collection manifest can be is determined by @API.MaxRequestSize@ in @config.yml@ as well as the maximum request size configuration in your reverse proxy or load balancer (e.g. @client_max_body_size@ in Nginx). + +
+manifest_size =
+   + (total data size / 64 MB) * 94
+   + sum(number of files * 20)
+   + sum(size of all directory paths)
+   + sum(size of all file names)
+
+ h3. Example manifests A manifest with four files in two directories: @@ -122,7 +144,7 @@ table(table table-bordered table-condensed). |@d41d8cd98f00b204e9800998ecf8427e+0+z@|Hint does not start with uppercase letter| |@d41d8cd98f00b204e9800998ecf8427e+0+Zfoo*bar@|Hint contains invalid character @*@| -h3. Token signatures +h3(#token_signatures). Token signatures A token signature (sign-hint) provides proof-of-access for a data block. It is computed by taking a SHA1 HMAC of the blob signing token (a shared secret between the API server and keep servers), block digest, current API token, expiration timestamp, and blob signature TTL. diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py index e5a81cdc73..7d4310b0e0 100644 --- a/sdk/cwl/arvados_cwl/runner.py +++ b/sdk/cwl/arvados_cwl/runner.py @@ -358,6 +358,7 @@ def upload_dependencies(arvrunner, name, document_loader, if "location" not in f and "path" in f: f["location"] = f["path"] del f["path"] + normalizeFilesDirs(f) optional_deps.append(f) visit_class(obj["default"], ("File", "Directory"), defaults_are_optional) diff --git a/sdk/cwl/tests/18888-download_def.cwl b/sdk/cwl/tests/18888-download_def.cwl new file mode 100644 index 0000000000..2237c448ed --- /dev/null +++ b/sdk/cwl/tests/18888-download_def.cwl @@ -0,0 +1,29 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +cwlVersion: v1.2 +class: CommandLineTool + +$namespaces: + arv: "http://arvados.org/cwl#" + +requirements: + NetworkAccess: + networkAccess: true + arv:RuntimeConstraints: + outputDirType: keep_output_dir + +inputs: + scripts: + type: Directory + default: + class: Directory + location: scripts/ +outputs: + out: + type: Directory + outputBinding: + glob: "." + +arguments: [$(inputs.scripts.path)/download_all_data.sh, "."] diff --git a/sdk/cwl/tests/arvados-tests.sh b/sdk/cwl/tests/arvados-tests.sh index 7727ebfa04..9cb5234cf0 100755 --- a/sdk/cwl/tests/arvados-tests.sh +++ b/sdk/cwl/tests/arvados-tests.sh @@ -3,6 +3,8 @@ # # SPDX-License-Identifier: Apache-2.0 +set -e + if ! arv-get d7514270f356df848477718d58308cc4+94 > /dev/null ; then arv-put --portable-data-hash testdir/* fi @@ -16,4 +18,6 @@ if ! arv-get 20850f01122e860fb878758ac1320877+71 > /dev/null ; then arv-put --portable-data-hash samples/sample1_S01_R1_001.fastq.gz fi +arvados-cwl-runner 18888-download_def.cwl --scripts scripts/ + exec cwltest --test arvados-tests.yml --tool arvados-cwl-runner $@ -- --disable-reuse --compute-checksum --api=containers diff --git a/sdk/cwl/tests/collection_per_tool/collection_per_tool_packed.cwl b/sdk/cwl/tests/collection_per_tool/collection_per_tool_packed.cwl index 1054d8f29b..c934274fcb 100644 --- a/sdk/cwl/tests/collection_per_tool/collection_per_tool_packed.cwl +++ b/sdk/cwl/tests/collection_per_tool/collection_per_tool_packed.cwl @@ -41,16 +41,22 @@ "inputs": [ { "default": { + "basename": "a.txt", "class": "File", - "location": "keep:b9fca8bf06b170b8507b80b2564ee72b+57/a.txt" + "location": "keep:b9fca8bf06b170b8507b80b2564ee72b+57/a.txt", + "nameext": ".txt", + "nameroot": "a" }, "id": "#step1.cwl/a", "type": "File" }, { "default": { + "basename": "b.txt", "class": "File", - "location": "keep:b9fca8bf06b170b8507b80b2564ee72b+57/b.txt" + "location": "keep:b9fca8bf06b170b8507b80b2564ee72b+57/b.txt", + "nameext": ".txt", + "nameroot": "b" }, "id": "#step1.cwl/b", "type": "File" @@ -69,16 +75,22 @@ "inputs": [ { "default": { + "basename": "b.txt", "class": "File", - "location": "keep:8e2d09a066d96cdffdd2be41579e4e2e+57/b.txt" + "location": "keep:8e2d09a066d96cdffdd2be41579e4e2e+57/b.txt", + "nameext": ".txt", + "nameroot": "b" }, "id": "#step2.cwl/b", "type": "File" }, { "default": { + "basename": "c.txt", "class": "File", - "location": "keep:8e2d09a066d96cdffdd2be41579e4e2e+57/c.txt" + "location": "keep:8e2d09a066d96cdffdd2be41579e4e2e+57/c.txt", + "nameext": ".txt", + "nameroot": "c" }, "id": "#step2.cwl/c", "type": "File" diff --git a/sdk/cwl/tests/scripts/download_all_data.sh b/sdk/cwl/tests/scripts/download_all_data.sh new file mode 100755 index 0000000000..d3a9d78762 --- /dev/null +++ b/sdk/cwl/tests/scripts/download_all_data.sh @@ -0,0 +1,7 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +#!/bin/bash + +echo bubble diff --git a/sdk/cwl/tests/submit_test_job.json b/sdk/cwl/tests/submit_test_job.json index 49d5944c06..be5f6bf1a1 100644 --- a/sdk/cwl/tests/submit_test_job.json +++ b/sdk/cwl/tests/submit_test_job.json @@ -18,6 +18,7 @@ "basename": "renamed.txt", "class": "File", "location": "keep:99999999999999999999999999999998+99/file1.txt" - }] + }], + "location": "_:df80736f-f14d-4b10-b2e3-03aa27f034bb" } } diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py index adee2d2089..5092fc4575 100644 --- a/sdk/cwl/tests/test_submit.py +++ b/sdk/cwl/tests/test_submit.py @@ -47,17 +47,22 @@ _rootDesc = None def stubs(func): @functools.wraps(func) + @mock.patch("uuid.uuid4") @mock.patch("arvados.commands.keepdocker.list_images_in_arv") @mock.patch("arvados.collection.KeepClient") @mock.patch("arvados.keep.KeepClient") @mock.patch("arvados.events.subscribe") - def wrapped(self, events, keep_client1, keep_client2, keepdocker, *args, **kwargs): + def wrapped(self, events, keep_client1, keep_client2, keepdocker, uuid4, *args, **kwargs): class Stubs(object): pass stubs = Stubs() stubs.events = events stubs.keepdocker = keepdocker + uuid4.side_effect = ["df80736f-f14d-4b10-b2e3-03aa27f034bb", "df80736f-f14d-4b10-b2e3-03aa27f034b1", + "df80736f-f14d-4b10-b2e3-03aa27f034b2", "df80736f-f14d-4b10-b2e3-03aa27f034b3", + "df80736f-f14d-4b10-b2e3-03aa27f034b4", "df80736f-f14d-4b10-b2e3-03aa27f034b5"] + def putstub(p, **kwargs): return "%s+%i" % (hashlib.md5(p).hexdigest(), len(p)) keep_client1().put.side_effect = putstub @@ -1614,6 +1619,7 @@ class TestCreateWorkflow(unittest.TestCase): self.existing_workflow_uuid + '\n') self.assertEqual(exited, 0) + @stubs def test_update_name(self, stubs): exited = arvados_cwl.main( diff --git a/sdk/cwl/tests/wf/expect_packed.cwl b/sdk/cwl/tests/wf/expect_packed.cwl index 4715c10a5e..42c7b251f8 100644 --- a/sdk/cwl/tests/wf/expect_packed.cwl +++ b/sdk/cwl/tests/wf/expect_packed.cwl @@ -11,8 +11,11 @@ "inputs": [ { "default": { + "basename": "blub.txt", "class": "File", - "location": "keep:5d373e7629203ce39e7c22af98a0f881+52/blub.txt" + "location": "keep:5d373e7629203ce39e7c22af98a0f881+52/blub.txt", + "nameext": ".txt", + "nameroot": "blub" }, "id": "#submit_tool.cwl/x", "inputBinding": { @@ -68,7 +71,8 @@ "nameroot": "renamed", "size": 0 } - ] + ], + "location": "_:df80736f-f14d-4b10-b2e3-03aa27f034b2" }, "id": "#main/z", "type": "Directory" diff --git a/sdk/cwl/tests/wf/expect_upload_packed.cwl b/sdk/cwl/tests/wf/expect_upload_packed.cwl index 0b13e3a819..644f87fd53 100644 --- a/sdk/cwl/tests/wf/expect_upload_packed.cwl +++ b/sdk/cwl/tests/wf/expect_upload_packed.cwl @@ -11,8 +11,11 @@ "inputs": [ { "default": { + "basename": "blub.txt", "class": "File", - "location": "keep:5d373e7629203ce39e7c22af98a0f881+52/blub.txt" + "location": "keep:5d373e7629203ce39e7c22af98a0f881+52/blub.txt", + "nameext": ".txt", + "nameroot": "blub" }, "id": "#submit_tool.cwl/x", "inputBinding": { @@ -74,7 +77,8 @@ "nameroot": "renamed", "size": 0 } - ] + ], + "location": "_:df80736f-f14d-4b10-b2e3-03aa27f034b2" }, "id": "#main/z", "type": "Directory"