* Blocks within a stream are ordered based on order of file tokens of the stream. A given block is listed at most once in a stream.
* Filename must not contain @"/"@ (the stream name represents the path prefix)
+h3. Estimating manifest size
+
+Here's a formula for estimating manifest size as stored in the database, assuming efficiently packed blocks.
+
+<pre>
+manifest_size =
+ + (total data size / 64 MB) * 40
+ + sum(number of files * 20)
+ + sum(size of all directory paths)
+ + sum(size of all file names)
+</pre>
+
+Here is the size when including block signatures. The block signatures authorize access to fetch each block from a Keep server, as <a href="#token_signatures">described below</a>. The signed manifest text is what is actually transferred to/from the API server and stored in RAM by @arv-mount@. The effective upper limit on how large a collection manifest can be is determined by @API.MaxRequestSize@ in @config.yml@ as well as the maximum request size configuration in your reverse proxy or load balancer (e.g. @client_max_body_size@ in Nginx).
+
+<pre>
+manifest_size =
+ + (total data size / 64 MB) * 94
+ + sum(number of files * 20)
+ + sum(size of all directory paths)
+ + sum(size of all file names)
+</pre>
+
h3. Example manifests
A manifest with four files in two directories:
|@d41d8cd98f00b204e9800998ecf8427e+0+z@|Hint does not start with uppercase letter|
|@d41d8cd98f00b204e9800998ecf8427e+0+Zfoo*bar@|Hint contains invalid character @*@|
-h3. Token signatures
+h3(#token_signatures). Token signatures
A token signature (sign-hint) provides proof-of-access for a data block. It is computed by taking a SHA1 HMAC of the blob signing token (a shared secret between the API server and keep servers), block digest, current API token, expiration timestamp, and blob signature TTL.
if "location" not in f and "path" in f:
f["location"] = f["path"]
del f["path"]
+ normalizeFilesDirs(f)
optional_deps.append(f)
visit_class(obj["default"], ("File", "Directory"), defaults_are_optional)
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+cwlVersion: v1.2
+class: CommandLineTool
+
+$namespaces:
+ arv: "http://arvados.org/cwl#"
+
+requirements:
+ NetworkAccess:
+ networkAccess: true
+ arv:RuntimeConstraints:
+ outputDirType: keep_output_dir
+
+inputs:
+ scripts:
+ type: Directory
+ default:
+ class: Directory
+ location: scripts/
+outputs:
+ out:
+ type: Directory
+ outputBinding:
+ glob: "."
+
+arguments: [$(inputs.scripts.path)/download_all_data.sh, "."]
#
# SPDX-License-Identifier: Apache-2.0
+set -e
+
if ! arv-get d7514270f356df848477718d58308cc4+94 > /dev/null ; then
arv-put --portable-data-hash testdir/*
fi
arv-put --portable-data-hash samples/sample1_S01_R1_001.fastq.gz
fi
+arvados-cwl-runner 18888-download_def.cwl --scripts scripts/
+
exec cwltest --test arvados-tests.yml --tool arvados-cwl-runner $@ -- --disable-reuse --compute-checksum --api=containers
"inputs": [
{
"default": {
+ "basename": "a.txt",
"class": "File",
- "location": "keep:b9fca8bf06b170b8507b80b2564ee72b+57/a.txt"
+ "location": "keep:b9fca8bf06b170b8507b80b2564ee72b+57/a.txt",
+ "nameext": ".txt",
+ "nameroot": "a"
},
"id": "#step1.cwl/a",
"type": "File"
},
{
"default": {
+ "basename": "b.txt",
"class": "File",
- "location": "keep:b9fca8bf06b170b8507b80b2564ee72b+57/b.txt"
+ "location": "keep:b9fca8bf06b170b8507b80b2564ee72b+57/b.txt",
+ "nameext": ".txt",
+ "nameroot": "b"
},
"id": "#step1.cwl/b",
"type": "File"
"inputs": [
{
"default": {
+ "basename": "b.txt",
"class": "File",
- "location": "keep:8e2d09a066d96cdffdd2be41579e4e2e+57/b.txt"
+ "location": "keep:8e2d09a066d96cdffdd2be41579e4e2e+57/b.txt",
+ "nameext": ".txt",
+ "nameroot": "b"
},
"id": "#step2.cwl/b",
"type": "File"
},
{
"default": {
+ "basename": "c.txt",
"class": "File",
- "location": "keep:8e2d09a066d96cdffdd2be41579e4e2e+57/c.txt"
+ "location": "keep:8e2d09a066d96cdffdd2be41579e4e2e+57/c.txt",
+ "nameext": ".txt",
+ "nameroot": "c"
},
"id": "#step2.cwl/c",
"type": "File"
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+#!/bin/bash
+
+echo bubble
"basename": "renamed.txt",
"class": "File",
"location": "keep:99999999999999999999999999999998+99/file1.txt"
- }]
+ }],
+ "location": "_:df80736f-f14d-4b10-b2e3-03aa27f034bb"
}
}
def stubs(func):
@functools.wraps(func)
+ @mock.patch("uuid.uuid4")
@mock.patch("arvados.commands.keepdocker.list_images_in_arv")
@mock.patch("arvados.collection.KeepClient")
@mock.patch("arvados.keep.KeepClient")
@mock.patch("arvados.events.subscribe")
- def wrapped(self, events, keep_client1, keep_client2, keepdocker, *args, **kwargs):
+ def wrapped(self, events, keep_client1, keep_client2, keepdocker, uuid4, *args, **kwargs):
class Stubs(object):
pass
stubs = Stubs()
stubs.events = events
stubs.keepdocker = keepdocker
+ uuid4.side_effect = ["df80736f-f14d-4b10-b2e3-03aa27f034bb", "df80736f-f14d-4b10-b2e3-03aa27f034b1",
+ "df80736f-f14d-4b10-b2e3-03aa27f034b2", "df80736f-f14d-4b10-b2e3-03aa27f034b3",
+ "df80736f-f14d-4b10-b2e3-03aa27f034b4", "df80736f-f14d-4b10-b2e3-03aa27f034b5"]
+
def putstub(p, **kwargs):
return "%s+%i" % (hashlib.md5(p).hexdigest(), len(p))
keep_client1().put.side_effect = putstub
self.existing_workflow_uuid + '\n')
self.assertEqual(exited, 0)
+
@stubs
def test_update_name(self, stubs):
exited = arvados_cwl.main(
"inputs": [
{
"default": {
+ "basename": "blub.txt",
"class": "File",
- "location": "keep:5d373e7629203ce39e7c22af98a0f881+52/blub.txt"
+ "location": "keep:5d373e7629203ce39e7c22af98a0f881+52/blub.txt",
+ "nameext": ".txt",
+ "nameroot": "blub"
},
"id": "#submit_tool.cwl/x",
"inputBinding": {
"nameroot": "renamed",
"size": 0
}
- ]
+ ],
+ "location": "_:df80736f-f14d-4b10-b2e3-03aa27f034b2"
},
"id": "#main/z",
"type": "Directory"
"inputs": [
{
"default": {
+ "basename": "blub.txt",
"class": "File",
- "location": "keep:5d373e7629203ce39e7c22af98a0f881+52/blub.txt"
+ "location": "keep:5d373e7629203ce39e7c22af98a0f881+52/blub.txt",
+ "nameext": ".txt",
+ "nameroot": "blub"
},
"id": "#submit_tool.cwl/x",
"inputBinding": {
"nameroot": "renamed",
"size": 0
}
- ]
+ ],
+ "location": "_:df80736f-f14d-4b10-b2e3-03aa27f034b2"
},
"id": "#main/z",
"type": "Directory"