From e378cc27decd91bf72309946b84c7346a75be7c1 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 26 Sep 2016 16:49:27 -0400 Subject: [PATCH] 10129: Trim listing from Directory objects that are keep references, because they can get very large. --- sdk/cwl/arvados_cwl/arvworkflow.py | 4 +++- sdk/cwl/arvados_cwl/runner.py | 6 ++++++ sdk/cwl/tests/submit_test_job.json | 8 ++++++++ sdk/cwl/tests/test_submit.py | 17 ++++++++++++++--- sdk/cwl/tests/wf/expect_packed.cwl | 3 +++ sdk/cwl/tests/wf/submit_wf.cwl | 2 ++ 6 files changed, 36 insertions(+), 4 deletions(-) diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py index ab8ad035fd..59760810f3 100644 --- a/sdk/cwl/arvados_cwl/arvworkflow.py +++ b/sdk/cwl/arvados_cwl/arvworkflow.py @@ -11,7 +11,7 @@ from cwltool.pathmapper import adjustFileObjs, adjustDirObjs import ruamel.yaml as yaml -from .runner import upload_docker, upload_dependencies +from .runner import upload_docker, upload_dependencies, del_listing from .arvtool import ArvadosCommandTool logger = logging.getLogger('arvados.cwl-runner') @@ -23,6 +23,8 @@ def upload_workflow(arvRunner, tool, job_order, project_uuid, update_uuid): packed = pack(document_loader, workflowobj, uri, tool.metadata) + adjustDirObjs(job_order, del_listing) + main = [p for p in packed["$graph"] if p["id"] == "#main"][0] for inp in main["inputs"]: sn = shortname(inp["id"]) diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py index cf2f1db997..839239258d 100644 --- a/sdk/cwl/arvados_cwl/runner.py +++ b/sdk/cwl/arvados_cwl/runner.py @@ -23,6 +23,10 @@ logger = logging.getLogger('arvados.cwl-runner') cwltool.draft2tool.ACCEPTLIST_RE = re.compile(r"^[a-zA-Z0-9._+-]+$") +def del_listing(obj): + if obj.get("location", "").startswith("keep:") and "listing" in obj: + del obj["listing"] + def upload_dependencies(arvrunner, name, document_loader, workflowobj, uri, loadref_run): """Upload the dependencies of the workflowobj document to Keep. @@ -129,6 +133,8 @@ class Runner(object): self.job_order.get("id", "#"), False) + adjustDirObjs(self.job_order, del_listing) + if "id" in self.job_order: del self.job_order["id"] diff --git a/sdk/cwl/tests/submit_test_job.json b/sdk/cwl/tests/submit_test_job.json index 95ff0ff981..02d61fa558 100644 --- a/sdk/cwl/tests/submit_test_job.json +++ b/sdk/cwl/tests/submit_test_job.json @@ -2,5 +2,13 @@ "x": { "class": "File", "path": "input/blorp.txt" + }, + "y": { + "class": "Directory", + "location": "keep:99999999999999999999999999999998+99", + "listing": [{ + "class": "File", + "location": "keep:99999999999999999999999999999998+99/file1.txt" + }] } } diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py index 200f95433b..f57ff320b2 100644 --- a/sdk/cwl/tests/test_submit.py +++ b/sdk/cwl/tests/test_submit.py @@ -88,6 +88,11 @@ def stubs(func): 'location': 'keep:99999999999999999999999999999994+99/blorp.txt', 'class': 'File' }, + 'y': { + 'basename': '99999999999999999999999999999998+99', + 'location': 'keep:99999999999999999999999999999998+99', + 'class': 'Directory' + }, 'cwl:tool': '99999999999999999999999999999991+99/wf/submit_wf.cwl' }, @@ -112,7 +117,7 @@ def stubs(func): 'kind': 'file' }, '/var/lib/cwl/job/cwl.input.json': { - 'portable_data_hash': '765fda0d9897729ff467a4609879c00a+60/cwl.input.json', + 'portable_data_hash': '606be75b6e4f811a2f282d7fac867043+60/cwl.input.json', 'kind': 'collection' } }, @@ -155,7 +160,7 @@ class TestSubmit(unittest.TestCase): 'manifest_text': './tool d51232d96b6116d964a69bfb7e0c73bf+450 ' '0:16:blub.txt 16:434:submit_tool.cwl\n./wf ' - '4d31c5fefd087faf67ca8db0111af36c+353 0:353:submit_wf.cwl\n', + '0f8864f292e901019c43fdabacd62c3e+383 0:383:submit_wf.cwl\n', 'owner_uuid': 'zzzzz-tpzed-zzzzzzzzzzzzzzz', 'name': 'submit_wf.cwl', }, ensure_unique_name=True), @@ -215,7 +220,7 @@ class TestSubmit(unittest.TestCase): 'manifest_text': './tool d51232d96b6116d964a69bfb7e0c73bf+450 ' '0:16:blub.txt 16:434:submit_tool.cwl\n./wf ' - '4d31c5fefd087faf67ca8db0111af36c+353 0:353:submit_wf.cwl\n', + '0f8864f292e901019c43fdabacd62c3e+383 0:383:submit_wf.cwl\n', 'owner_uuid': 'zzzzz-tpzed-zzzzzzzzzzzzzzz', 'name': 'submit_wf.cwl', }, ensure_unique_name=True), @@ -267,6 +272,12 @@ class TestCreateTemplate(unittest.TestCase): 'type': 'File', 'value': '99999999999999999999999999999994+99/blorp.txt', } + expect_component['script_parameters']['y'] = { + 'dataclass': 'Collection', + 'required': True, + 'type': 'Directory', + 'value': '99999999999999999999999999999998+99', + } expect_template = { "components": { "submit_wf.cwl": expect_component, diff --git a/sdk/cwl/tests/wf/expect_packed.cwl b/sdk/cwl/tests/wf/expect_packed.cwl index 3545f34abb..f21053732d 100644 --- a/sdk/cwl/tests/wf/expect_packed.cwl +++ b/sdk/cwl/tests/wf/expect_packed.cwl @@ -16,6 +16,9 @@ $graph: - default: {basename: blorp.txt, class: File, location: 'keep:99999999999999999999999999999991+99/input/blorp.txt'} id: '#main/x' type: File + - default: {basename: 99999999999999999999999999999998+99, class: Directory, location: 'keep:99999999999999999999999999999998+99'} + id: '#main/y' + type: Directory outputs: [] steps: - id: '#main/step1' diff --git a/sdk/cwl/tests/wf/submit_wf.cwl b/sdk/cwl/tests/wf/submit_wf.cwl index 874c72c50f..b4446559cc 100644 --- a/sdk/cwl/tests/wf/submit_wf.cwl +++ b/sdk/cwl/tests/wf/submit_wf.cwl @@ -8,6 +8,8 @@ cwlVersion: v1.0 inputs: - id: x type: File + - id: y + type: Directory outputs: [] steps: - id: step1 -- 2.30.2