From: Peter Amstutz Date: Fri, 2 Dec 2016 21:50:06 +0000 (-0500) Subject: 10576: Working on fetch & url join for keep references. X-Git-Tag: 1.1.0~543^2~20 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/9f0854e394baf712f1fbcf3ebd21af215276c0fb 10576: Working on fetch & url join for keep references. --- diff --git a/build/run-build-packages.sh b/build/run-build-packages.sh index ddf93832b3..300333a0b7 100755 --- a/build/run-build-packages.sh +++ b/build/run-build-packages.sh @@ -474,7 +474,8 @@ fpm_build lockfile "" "" python 0.12.2 --epoch 1 # So we build this thing separately. # # Ward, 2016-03-17 -fpm_build schema_salad "" "" python 1.20.20161122192122 --depends "${PYTHON2_PKG_PREFIX}-lockfile >= 1:0.12.2-2" +saladversion=$(cat "$WORKSPACE/sdk/cwl/setup.py" | grep schema-salad== | sed "s/.*==\(.*\)'.*/\1/") +fpm_build schema_salad "" "" python $saladversion --depends "${PYTHON2_PKG_PREFIX}-lockfile >= 1:0.12.2-2" # And schema_salad now depends on ruamel-yaml, which apparently has a braindead setup.py that requires special arguments to build (otherwise, it aborts with 'error: you have to install with "pip install ."'). Sigh. # Ward, 2016-05-26 @@ -485,7 +486,7 @@ fpm_build ruamel.yaml "" "" python 0.12.4 --python-setup-py-arguments "--single- fpm_build cwltest "" "" python 1.0.20160907111242 # And for cwltool we have the same problem as for schema_salad. Ward, 2016-03-17 -cwltoolversion=$(cat "$WORKSPACE/sdk/cwl/setup.py" | grep cwltool== | sed "s/.*==\(1\.0\..*\)'.*/\1/") +cwltoolversion=$(cat "$WORKSPACE/sdk/cwl/setup.py" | grep cwltool== | sed "s/.*==\(.*\)'.*/\1/") fpm_build cwltool "" "" python $cwltoolversion # FPM eats the trailing .0 in the python-rdflib-jsonld package when built with 'rdflib-jsonld>=0.3.0'. Force the version. Ward, 2016-03-25 diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py index cacb7b81a0..c7c186390b 100644 --- a/sdk/cwl/arvados_cwl/__init__.py +++ b/sdk/cwl/arvados_cwl/__init__.py @@ -21,6 +21,7 @@ import schema_salad import arvados import arvados.config +from arvados.keep import KeepClient from arvados.errors import ApiError from .arvcontainer import ArvadosContainer, RunnerContainer @@ -28,7 +29,7 @@ from .arvjob import ArvadosJob, RunnerJob, RunnerTemplate from. runner import Runner, upload_instance from .arvtool import ArvadosCommandTool from .arvworkflow import ArvadosWorkflow, upload_workflow -from .fsaccess import CollectionFsAccess +from .fsaccess import CollectionFsAccess, CollectionFetcher from .perf import Perf from .pathmapper import FinalOutputPathMapper from ._version import __version__ @@ -50,7 +51,7 @@ class ArvCwlRunner(object): """ - def __init__(self, api_client, work_api=None, keep_client=None, output_name=None, output_tags=None): + def __init__(self, api_client, work_api=None, keep_client=None, output_name=None, output_tags=None, num_retries=4): self.api = api_client self.processes = {} self.lock = threading.Lock() @@ -58,7 +59,7 @@ class ArvCwlRunner(object): self.final_output = None self.final_status = None self.uploaded = {} - self.num_retries = 4 + self.num_retries = num_retries self.uuid = None self.stop_polling = threading.Event() self.poll_api = None @@ -586,7 +587,11 @@ def main(args, stdout, stderr, api_client=None, keep_client=None): try: if api_client is None: api_client=arvados.api('v1', model=OrderedJsonModel()) - runner = ArvCwlRunner(api_client, work_api=arvargs.work_api, keep_client=keep_client, output_name=arvargs.output_name, output_tags=arvargs.output_tags) + if keep_client is None: + keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4) + runner = ArvCwlRunner(api_client, work_api=arvargs.work_api, keep_client=keep_client, + num_retries=4, output_name=arvargs.output_name, + output_tags=arvargs.output_tags) except Exception as e: logger.error(e) return 1 @@ -613,4 +618,9 @@ def main(args, stdout, stderr, api_client=None, keep_client=None): makeTool=runner.arv_make_tool, versionfunc=versionstring, job_order_object=job_order_object, - make_fs_access=partial(CollectionFsAccess, api_client=api_client)) + make_fs_access=partial(CollectionFsAccess, + api_client=api_client, + keep_client=keep_client), + fetcher_constructor=partial(CollectionFetcher, + api_client=api_client, + keep_client=keep_client)) diff --git a/sdk/cwl/arvados_cwl/fsaccess.py b/sdk/cwl/arvados_cwl/fsaccess.py index 89a4308bf1..9d3fe1c1ba 100644 --- a/sdk/cwl/arvados_cwl/fsaccess.py +++ b/sdk/cwl/arvados_cwl/fsaccess.py @@ -1,6 +1,7 @@ import fnmatch import os import errno +import urlparse import cwltool.stdfsaccess from cwltool.pathmapper import abspath @@ -9,6 +10,8 @@ import arvados.util import arvados.collection import arvados.arvfile +from schema_salad.ref_resolver import DefaultFetcher + class CollectionFsAccess(cwltool.stdfsaccess.StdFsAccess): """Implement the cwltool FsAccess interface for Arvados Collections.""" @@ -120,3 +123,51 @@ class CollectionFsAccess(cwltool.stdfsaccess.StdFsAccess): return path else: return os.path.realpath(path) + +class CollectionFetcher(DefaultFetcher): + def __init__(self, cache, session, api_client=None, keep_client=None): + super(CollectionFetcher, self).__init__(cache, session) + self.fsaccess = CollectionFsAccess("", api_client=api_client, keep_client=keep_client) + + def fetch_text(self, url): + if url.startswith("keep:"): + with self.fsaccess.open(url) as f: + return f.read() + return super(CollectionFetcher, self).fetch_text(url) + + def check_exists(self, url): + if url.startswith("keep:"): + return self.fsaccess.exists(url) + return super(CollectionFetcher, self).check_exists(url) + + def urljoin(self, base_url, url): + if not url: + return base_url + + urlsp = urlparse.urlsplit(url) + if urlsp.scheme: + return url + + basesp = urlparse.urlsplit(base_url) + if basesp.scheme == "keep": + if not basesp.path: + raise IOError(errno.EINVAL, "Invalid Keep locator", base_url) + + baseparts = basesp.path.split("/") + urlparts = urlsp.path.split("/") + + pdh = baseparts.pop(0) + + if not arvados.util.keep_locator_pattern.match(pdh): + raise IOError(errno.EINVAL, "Invalid Keep locator", base_url) + + if urlsp.path.startswith("/"): + baseparts = [] + + if baseparts and urlparts: + baseparts.pop() + + path = "/".join([pdh] + baseparts + urlparts) + return urlparse.urlunsplit(("keep", "", path, "", urlsp.fragment)) + + return super(CollectionFetcher, self).urljoin(base_url, url) diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py index 7751644f51..f894264a36 100644 --- a/sdk/cwl/setup.py +++ b/sdk/cwl/setup.py @@ -45,10 +45,11 @@ setup(name='arvados-cwl-runner', 'bin/cwl-runner', 'bin/arvados-cwl-runner' ], - # Make sure to update arvados/build/run-build-packages.sh as well - # when updating the cwltool version pin. + # Note that arvados/build/run-build-packages.sh looks at this + # file to determine what version of cwltool and schema-salad to build. install_requires=[ - 'cwltool==1.0.20161128202906', + 'cwltool==1.0.20161202203310', + 'schema-salad==1.21.20161202201331', 'arvados-python-client>=0.1.20160826210445' ], data_files=[