From: Tom Clegg Date: Fri, 14 Feb 2020 20:13:15 +0000 (-0500) Subject: Merge branch '12308-cgofuse' X-Git-Tag: 2.1.0~297 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/db791b7a682627e0d3e2f1efc821dc3b0f311942?hp=504e09d413026fcac8ac94530134da2fce4dc0f2 Merge branch '12308-cgofuse' refs #12308 Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000..5345f045ff --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,96 @@ +Arvados Code of Conduct +======================= + +The Arvados Project is dedicated to providing a harassment-free experience for +everyone. We do not tolerate harassment of participants in any form. + +This code of conduct applies to all Arvados Project spaces both online and off: +Gitter chat, Redmine issues, wiki, mailing lists, forums, video chats, and any other +Arvados spaces. Anyone who violates this code of conduct may be sanctioned or +expelled from these spaces at the discretion of the Arvados Team. + +Some Arvados Project spaces may have additional rules in place, which will be +made clearly available to participants. Participants are responsible for +knowing and abiding by these rules. + +Harassment includes, but is not limited to: + + - Offensive comments related to gender, gender identity and expression, sexual +orientation, disability, mental illness, neuro(a)typicality, physical +appearance, body size, age, race, or religion. + - Unwelcome comments regarding a person’s lifestyle choices and practices, +including those related to food, health, parenting, drugs, and employment. + - Deliberate misgendering or use of [dead](https://www.quora.com/What-is-deadnaming/answer/Nancy-C-Walker) +or rejected names. + - Gratuitous or off-topic sexual images or behaviour in spaces where they’re not +appropriate. + - Physical contact and simulated physical contact (eg, textual descriptions like +“\*hug\*” or “\*backrub\*”) without consent or after a request to stop. + - Threats of violence. + - Incitement of violence towards any individual, including encouraging a person +to commit suicide or to engage in self-harm. + - Deliberate intimidation. + - Stalking or following. + - Harassing photography or recording, including logging online activity for +harassment purposes. + - Sustained disruption of discussion. + - Unwelcome sexual attention. + - Pattern of inappropriate social contact, such as requesting/assuming +inappropriate levels of intimacy with others + - Continued one-on-one communication after requests to cease. + - Deliberate “outing” of any aspect of a person’s identity without their consent +except as necessary to protect vulnerable people from intentional abuse. + - Publication of non-harassing private communication. + +The Arvados Project prioritizes marginalized people’s safety over privileged +people’s comfort. The Arvados Leadership Team will not act on complaints regarding: + + - ‘Reverse’ -isms, including ‘reverse racism,’ ‘reverse sexism,’ and ‘cisphobia’ + - Reasonable communication of boundaries, such as “leave me alone,” “go away,” or +“I’m not discussing this with you.” + - Communicating in a [tone](http://geekfeminism.wikia.com/wiki/Tone_argument) +you don’t find congenial + +Reporting +--------- + +If you are being harassed by a member of the Arvados Project, notice that someone +else is being harassed, or have any other concerns, please contact the Arvados +Project Team at contact@arvados.org. If person who is harassing +you is on the team, they will recuse themselves from handling your incident. We +will respond as promptly as we can. + +This code of conduct applies to Arvados Project spaces, but if you are being +harassed by a member of Arvados Project outside our spaces, we still want to +know about it. We will take all good-faith reports of harassment by Arvados Project +members, especially the Arvados Team, seriously. This includes harassment +outside our spaces and harassment that took place at any point in time. The +abuse team reserves the right to exclude people from the Arvados Project based on +their past behavior, including behavior outside Arvados Project spaces and +behavior towards people who are not in the Arvados Project. + +In order to protect volunteers from abuse and burnout, we reserve the right to +reject any report we believe to have been made in bad faith. Reports intended +to silence legitimate criticism may be deleted without response. + +We will respect confidentiality requests for the purpose of protecting victims +of abuse. At our discretion, we may publicly name a person about whom we’ve +received harassment complaints, or privately warn third parties about them, if +we believe that doing so will increase the safety of Arvados Project members or +the general public. We will not name harassment victims without their +affirmative consent. + +Consequences +------------ + +Participants asked to stop any harassing behavior are expected to comply +immediately. + +If a participant engages in harassing behavior, the Arvados Team may +take any action they deem appropriate, up to and including expulsion from all +Arvados Project spaces and identification of the participant as a harasser to other +Arvados Project members or the general public. + +This anti-harassment policy is based on the [example policy from the Geek +Feminism wiki](http://geekfeminism.wikia.com/wiki/Community_anti-harassment/Policy), +created by the Geek Feminism community. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000..459d7277a5 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,75 @@ +[comment]: # (Copyright © The Arvados Authors. All rights reserved.) +[comment]: # () +[comment]: # (SPDX-License-Identifier: CC-BY-SA-3.0) + +# Contributing + +Arvados is free software, which means it is free for all to use, learn +from, and improve. We encourage contributions from the community that +improve Arvados for everyone. Some examples of contributions are bug +reports, bug fixes, new features, and scripts or documentation that help +with using, administering, or installing Arvados. We also love to +hear about Arvados success stories. + +Those interested in contributing should begin by joining the [Arvados community +channel](https://gitter.im/arvados/community) and telling us about your interest. + +Contributers should also create an account at https://dev.arvados.org +to be able to create and comment on bug tracker issues. The +Arvados public bug tracker is located at +https://dev.arvados.org/projects/arvados/issues . + +Contributers may also be interested in the [development road map](https://dev.arvados.org/issues/gantt?utf8=%E2%9C%93&set_filter=1&gantt=1&f%5B%5D=project_id&op%5Bproject_id%5D=%3D&v%5Bproject_id%5D%5B%5D=49&f%5B%5D=&zoom=1). + +# Development + +Git repositories for primary development are located at +https://git.arvados.org/ and can also be browsed at +https://dev.arvados.org/projects/arvados/repository . Every push to +the master branch is also mirrored to Github at +https://github.com/arvados/arvados . + +Visit [Hacking Arvados](https://dev.arvados.org/projects/arvados/wiki/Hacking) for +detailed information about setting up an Arvados development +environment, development process, coding standards, and notes about specific components. + +If you wish to build the Arvados documentation from a local git clone, see +[doc/README.textile](doc/README.textile) for instructions. + +# Pull requests + +The preferred method for making contributions is through Github pull requests. + +This is the general contribution process: + +1. Fork the Arvados repository using the Github "Fork" button +2. Clone your fork, make your changes, commit to your fork. +3. Every commit message must have a DCO sign-off and every file must have a SPDX license (see below). +4. Add yourself to the [AUTHORS](AUTHORS) file +5. When your fork is ready, through Github, Create a Pull Request against `arvados:master` +6. Notify the core team about your pull request through the [Arvados development +channel](https://gitter.im/arvados/development) or by other means. +7. A member of the core team will review the pull request. They may have questions or comments, or request changes. +8. When the contribution is ready, a member of the core team will +merge the pull request into the master branch, which will +automatically resolve the pull request. + +The Arvados project does not require a contributor agreement in advance, but does require each commit message include a [Developer Certificate of Origin](https://dev.arvados.org/projects/arvados/wiki/Developer_Certificate_Of_Origin). Please ensure *every git commit message* includes `Arvados-DCO-1.1-Signed-off-by`. If you have already made commits without it, fix them with `git commit --amend` or `git rebase`. + +The Developer Certificate of Origin line looks like this: + +``` +Arvados-DCO-1.1-Signed-off-by: Joe Smith +``` + +New files must also include `SPDX-License-Identifier` at the top with one of the three Arvados open source licenses. See [COPYING](COPYING) for details. + +# Continuous integration + +Continuous integration is hosted at https://ci.arvados.org/ + +Currently, external contributers cannot trigger builds. We are investigating integration with Github pull requests for the future. + +[![Build Status](https://ci.arvados.org/buildStatus/icon?job=run-tests)](https://ci.arvados.org/job/run-tests/) + +[![Go Report Card](https://goreportcard.com/badge/github.com/arvados/arvados)](https://goreportcard.com/report/github.com/arvados/arvados) diff --git a/COPYING b/COPYING index 61c31397a0..c549d8a7bb 100644 --- a/COPYING +++ b/COPYING @@ -17,3 +17,7 @@ The full license text for each license is available in this directory: AGPL-3.0: agpl-3.0.txt Apache-2.0: apache-2.0.txt CC-BY-SA-3.0: cc-by-sa-3.0.txt + +As a general rule, code in the sdk/ directory is licensed Apache-2.0, +documentation in the doc/ directory is licensed CC-BY-SA-3.0, and +everything else is licensed AGPL-3.0. \ No newline at end of file diff --git a/README.md b/README.md index 08c102557c..fced2eb5b7 100644 --- a/README.md +++ b/README.md @@ -2,22 +2,47 @@ [comment]: # () [comment]: # (SPDX-License-Identifier: CC-BY-SA-3.0) -[Arvados](https://arvados.org) is a free software distributed computing platform -for bioinformatics, data science, and high throughput analysis of massive data -sets. Arvados supports a variety of cloud, cluster and HPC environments. +[![Join the chat at https://gitter.im/arvados/community](https://badges.gitter.im/arvados/community.svg)](https://gitter.im/arvados/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) | [Installing Arvados](https://doc.arvados.org/install/index.html) | [Installing Client SDKs](https://doc.arvados.org/sdk/index.html) | [Report a bug](https://dev.arvados.org/projects/arvados/issues/new) | [Development and Contributing](CONTRIBUTING.md) -Arvados consists of: + -* *Keep*: a petabyte-scale content-addressed distributed storage system for managing and - storing collections of files, accessible via HTTP and FUSE mount. +[Arvados](https://arvados.org) is an open source platform for +managing, processing, and sharing genomic and other large scientific +and biomedical data. With Arvados, bioinformaticians run and scale +compute-intensive workflows, developers create biomedical +applications, and IT administrators manage large compute and storage +resources. -* *Crunch*: a Docker-based cluster and HPC workflow engine designed providing - strong versioning, reproducibilty, and provenance of computations. +The key components of Arvados are: -* Related services and components including a web workbench for managing files - and compute jobs, REST APIs, SDKs, and other tools. +* *Keep*: Keep is the Arvados storage system for managing and storing large +collections of files. Keep combines content addressing and a +distributed storage architecture resulting in both high reliability +and high throughput. Every file stored in Keep can be accurately +verified every time it is retrieved. Keep supports the creation of +collections as a flexible way to define data sets without having to +re-organize or needlessly copy data. Keep works on a wide range of +underlying filesystems and object stores. -## Quick start +* *Crunch*: Crunch is the orchestration system for running [Common Workflow Language](https://www.commonwl.org) workflows. It is +designed to maintain data provenance and workflow +reproducibility. Crunch automatically tracks data inputs and outputs +through Keep and executes workflow processes in Docker containers. In +a cloud environment, Crunch optimizes costs by scaling compute on demand. + +* *Workbench*: The Workbench web application allows users to interactively access +Arvados functionality. It is especially helpful for querying and +browsing data, visualizing provenance, and tracking the progress of +workflows. + +* *Command Line tools*: The command line interface (CLI) provides convenient access to Arvados +functionality in the Arvados platform from the command line. + +* *API and SDKs*: Arvados is designed to be integrated with existing infrastructure. All +the services in Arvados are accessed through a RESTful API. SDKs are +available for Python, Go, R, Perl, Ruby, and Java. + +# Quick start To try out Arvados on your local workstation, you can use Arvbox, which provides Arvados components pre-installed in a Docker container (requires @@ -32,48 +57,40 @@ In this mode you will only be able to connect to Arvbox from the same host. To configure Arvbox to be accessible over a network and for other options see http://doc.arvados.org/install/arvbox.html for details. -## Documentation +# Documentation -Complete documentation, including a User Guide, Installation documentation and -API documentation is available at http://doc.arvados.org/ +Complete documentation, including the [User Guide](https://doc.arvados.org/user/index.html), [Installation documentation](https://doc.arvados.org/install/index.html), [Administrator documentation](https://doc.arvados.org/admin/index.html) and +[API documentation](https://doc.arvados.org/api/index.html) is available at http://doc.arvados.org/ If you wish to build the Arvados documentation from a local git clone, see -doc/README.textile for instructions. +[doc/README.textile](doc/README.textile) for instructions. -## Community +# Community [![Join the chat at https://gitter.im/arvados/community](https://badges.gitter.im/arvados/community.svg)](https://gitter.im/arvados/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -The [arvados community channel](https://gitter.im/arvados/community) +The [Arvados community channel](https://gitter.im/arvados/community) channel at [gitter.im](https://gitter.im) is available for live discussion and support. -The -[Arvados user mailing list](http://lists.arvados.org/mailman/listinfo/arvados) -is a forum for general discussion, questions, and news about Arvados -development. The -[Arvados developer mailing list](http://lists.arvados.org/mailman/listinfo/arvados-dev) -is a forum for more technical discussion, intended for developers and -contributors to Arvados. +The [Arvados developement channel](https://gitter.im/arvados/development) +channel at [gitter.im](https://gitter.im) is used to coordinate development. -## Development +The [Arvados user mailing list](http://lists.arvados.org/mailman/listinfo/arvados) +is used to announce new versions and other news. -[![Build Status](https://ci.arvados.org/buildStatus/icon?job=run-tests)](https://ci.arvados.org/job/run-tests/) -[![Go Report Card](https://goreportcard.com/badge/github.com/arvados/arvados)](https://goreportcard.com/report/github.com/arvados/arvados) +All participants are expected to abide by the [Arvados Code of Conduct](CODE_OF_CONDUCT.md). -The Arvados public bug tracker is located at https://dev.arvados.org/projects/arvados/issues +# Reporting bugs -Continuous integration is hosted at https://ci.arvados.org/ +[Report a bug](https://dev.arvados.org/projects/arvados/issues/new) on [dev.arvados.org](https://dev.arvados.org). -Instructions for setting up a development environment and working on specific -components can be found on the -["Hacking Arvados" page of the Arvados wiki](https://dev.arvados.org/projects/arvados/wiki/Hacking). +# Development and Contributing -## Contributing +See [CONTRIBUTING](CONTRIBUTING.md) for information about Arvados development and how to contribute to the Arvados project. -When making a pull request, please ensure *every git commit message* includes a one-line [Developer Certificate of Origin](https://dev.arvados.org/projects/arvados/wiki/Developer_Certificate_Of_Origin). If you have already made commits without it, fix them with `git commit --amend` or `git rebase`. +The [development road map](https://dev.arvados.org/issues/gantt?utf8=%E2%9C%93&set_filter=1&gantt=1&f%5B%5D=project_id&op%5Bproject_id%5D=%3D&v%5Bproject_id%5D%5B%5D=49&f%5B%5D=&zoom=1) outlines some of the project priorities over the next twelve months. -## Licensing +# Licensing -Arvados is Free Software. See COPYING for information about Arvados Free -Software licenses. +Arvados is Free Software. See [COPYING](COPYING) for information about the open source licenses used in Arvados. diff --git a/build/run-tests.sh b/build/run-tests.sh index 8fe51e4794..891faca419 100755 --- a/build/run-tests.sh +++ b/build/run-tests.sh @@ -648,8 +648,8 @@ install_env() { . "$VENVDIR/bin/activate" # Needed for run_test_server.py which is used by certain (non-Python) tests. - pip install --no-cache-dir PyYAML future \ - || fatal "pip install PyYAML failed" + pip install --no-cache-dir PyYAML future httplib2 \ + || fatal "`pip install PyYAML future httplib2` failed" # Preinstall libcloud if using a fork; otherwise nodemanager "pip # install" won't pick it up by default. @@ -1100,6 +1100,7 @@ install_deps() { do_install sdk/cli do_install sdk/perl do_install sdk/python pip + do_install sdk/python pip3 do_install sdk/ruby do_install services/api do_install services/arv-git-httpd go diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py index 62ceab2fa1..d4bb6d102a 100644 --- a/sdk/cwl/setup.py +++ b/sdk/cwl/setup.py @@ -36,7 +36,8 @@ setup(name='arvados-cwl-runner', 'bin/arvados-cwl-runner', ], # Note that arvados/build/run-build-packages.sh looks at this - # file to determine what version of cwltool and schema-salad to build. + # file to determine what version of cwltool and schema-salad to + # build. install_requires=[ 'cwltool==1.0.20190831161204', 'schema-salad==4.5.20190815125611', @@ -63,5 +64,5 @@ setup(name='arvados-cwl-runner', 'mock>=1.0,<4', 'subprocess32>=3.5.1', ], - zip_safe=True - ) + zip_safe=True, +) diff --git a/sdk/pam/setup.py b/sdk/pam/setup.py index af00142a04..59b49a19fe 100755 --- a/sdk/pam/setup.py +++ b/sdk/pam/setup.py @@ -53,5 +53,5 @@ setup(name='arvados-pam', ], test_suite='tests', tests_require=['pbr<1.7.0', 'mock>=1.0', 'python-pam'], - zip_safe=False - ) + zip_safe=False, +) diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py index b18ce25fd2..ae687c50bd 100644 --- a/sdk/python/arvados/api.py +++ b/sdk/python/arvados/api.py @@ -237,6 +237,7 @@ def api(version=None, cache=True, host=None, token=None, insecure=False, svc.api_token = token svc.insecure = insecure svc.request_id = request_id + svc.config = lambda: util.get_config_once(svc) kwargs['http'].max_request_size = svc._rootDesc.get('maxRequestSize', 0) kwargs['http'].cache = None kwargs['http']._request_id = lambda: svc.request_id or util.new_request_id() diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py index fd29a3dc1d..9e0a317830 100644 --- a/sdk/python/arvados/util.py +++ b/sdk/python/arvados/util.py @@ -419,3 +419,11 @@ def new_request_id(): rid += chr(c+ord('a')-10) n = n // 36 return rid + +def get_config_once(svc): + if not svc._rootDesc.get('resources')['configs']: + # Old API server version, no config export endpoint + return {} + if not hasattr(svc, '_cached_config'): + svc._cached_config = svc.configs().get().execute() + return svc._cached_config diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py index 0944a31875..3a0316cf9e 100644 --- a/services/fuse/arvados_fuse/__init__.py +++ b/services/fuse/arvados_fuse/__init__.py @@ -98,7 +98,7 @@ else: LLFUSE_VERSION_0 = llfuse.__version__.startswith('0') -from .fusedir import sanitize_filename, Directory, CollectionDirectory, TmpCollectionDirectory, MagicDirectory, TagsDirectory, ProjectDirectory, SharedDirectory, CollectionDirectoryBase +from .fusedir import Directory, CollectionDirectory, TmpCollectionDirectory, MagicDirectory, TagsDirectory, ProjectDirectory, SharedDirectory, CollectionDirectoryBase from .fusefile import StringFile, FuseArvadosFile _logger = logging.getLogger('arvados.arvados_fuse') diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py index 5283367532..7bef8a269f 100644 --- a/services/fuse/arvados_fuse/command.py +++ b/services/fuse/arvados_fuse/command.py @@ -301,7 +301,7 @@ class Mount(object): return e = self.operations.inodes.add_entry(Directory( - llfuse.ROOT_INODE, self.operations.inodes)) + llfuse.ROOT_INODE, self.operations.inodes, self.api.config)) dir_args[0] = e.inode for name in self.args.mount_by_id: diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py index 3287657441..8b12f73e89 100644 --- a/services/fuse/arvados_fuse/fusedir.py +++ b/services/fuse/arvados_fuse/fusedir.py @@ -33,20 +33,6 @@ _logger = logging.getLogger('arvados.arvados_fuse') # appear as underscores in the fuse mount.) _disallowed_filename_characters = re.compile('[\x00/]') -# '.' and '..' are not reachable if API server is newer than #6277 -def sanitize_filename(dirty): - """Replace disallowed filename characters with harmless "_".""" - if dirty is None: - return None - elif dirty == '': - return '_' - elif dirty == '.': - return '_' - elif dirty == '..': - return '__' - else: - return _disallowed_filename_characters.sub('_', dirty) - class Directory(FreshBase): """Generic directory object, backed by a dict. @@ -55,7 +41,7 @@ class Directory(FreshBase): and the value referencing a File or Directory object. """ - def __init__(self, parent_inode, inodes): + def __init__(self, parent_inode, inodes, apiconfig): """parent_inode is the integer inode number""" super(Directory, self).__init__() @@ -65,11 +51,53 @@ class Directory(FreshBase): raise Exception("parent_inode should be an int") self.parent_inode = parent_inode self.inodes = inodes + self.apiconfig = apiconfig self._entries = {} self._mtime = time.time() - # Overriden by subclasses to implement logic to update the entries dict - # when the directory is stale + def forward_slash_subst(self): + if not hasattr(self, '_fsns'): + self._fsns = None + config = self.apiconfig() + try: + self._fsns = config["Collections"]["ForwardSlashNameSubstitution"] + except KeyError: + # old API server with no FSNS config + self._fsns = '_' + else: + if self._fsns == '' or self._fsns == '/': + self._fsns = None + return self._fsns + + def unsanitize_filename(self, incoming): + """Replace ForwardSlashNameSubstitution value with /""" + fsns = self.forward_slash_subst() + if isinstance(fsns, str): + return incoming.replace(fsns, '/') + else: + return incoming + + def sanitize_filename(self, dirty): + """Replace disallowed filename characters according to + ForwardSlashNameSubstitution in self.api_config.""" + # '.' and '..' are not reachable if API server is newer than #6277 + if dirty is None: + return None + elif dirty == '': + return '_' + elif dirty == '.': + return '_' + elif dirty == '..': + return '__' + else: + fsns = self.forward_slash_subst() + if isinstance(fsns, str): + dirty = dirty.replace('/', fsns) + return _disallowed_filename_characters.sub('_', dirty) + + + # Overridden by subclasses to implement logic to update the + # entries dict when the directory is stale @use_counter def update(self): pass @@ -138,7 +166,7 @@ class Directory(FreshBase): self._entries = {} changed = False for i in items: - name = sanitize_filename(fn(i)) + name = self.sanitize_filename(fn(i)) if name: if name in oldentries and same(oldentries[name], i): # move existing directory entry over @@ -246,12 +274,13 @@ class CollectionDirectoryBase(Directory): """ - def __init__(self, parent_inode, inodes, collection): - super(CollectionDirectoryBase, self).__init__(parent_inode, inodes) + def __init__(self, parent_inode, inodes, apiconfig, collection): + super(CollectionDirectoryBase, self).__init__(parent_inode, inodes, apiconfig) + self.apiconfig = apiconfig self.collection = collection def new_entry(self, name, item, mtime): - name = sanitize_filename(name) + name = self.sanitize_filename(name) if hasattr(item, "fuse_entry") and item.fuse_entry is not None: if item.fuse_entry.dead is not True: raise Exception("Can only reparent dead inode entry") @@ -260,7 +289,7 @@ class CollectionDirectoryBase(Directory): item.fuse_entry.dead = False self._entries[name] = item.fuse_entry elif isinstance(item, arvados.collection.RichCollectionBase): - self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, item)) + self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, self.apiconfig, item)) self._entries[name].populate(mtime) else: self._entries[name] = self.inodes.add_entry(FuseArvadosFile(self.inode, item, mtime)) @@ -268,7 +297,7 @@ class CollectionDirectoryBase(Directory): def on_event(self, event, collection, name, item): if collection == self.collection: - name = sanitize_filename(name) + name = self.sanitize_filename(name) _logger.debug("collection notify %s %s %s %s", event, collection, name, item) with llfuse.lock: if event == arvados.collection.ADD: @@ -357,7 +386,7 @@ class CollectionDirectory(CollectionDirectoryBase): """Represents the root of a directory tree representing a collection.""" def __init__(self, parent_inode, inodes, api, num_retries, collection_record=None, explicit_collection=None): - super(CollectionDirectory, self).__init__(parent_inode, inodes, None) + super(CollectionDirectory, self).__init__(parent_inode, inodes, api.config, None) self.api = api self.num_retries = num_retries self.collection_record_file = None @@ -548,7 +577,7 @@ class TmpCollectionDirectory(CollectionDirectoryBase): keep_client=api_client.keep, num_retries=num_retries) super(TmpCollectionDirectory, self).__init__( - parent_inode, inodes, collection) + parent_inode, inodes, api_client.config, collection) self.collection_record_file = None self.populate(self.mtime()) @@ -625,7 +654,7 @@ and the directory will appear if it exists. """.lstrip() def __init__(self, parent_inode, inodes, api, num_retries, pdh_only=False): - super(MagicDirectory, self).__init__(parent_inode, inodes) + super(MagicDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self.pdh_only = pdh_only @@ -660,6 +689,7 @@ and the directory will appear if it exists. e = self.inodes.add_entry(ProjectDirectory( self.inode, self.inodes, self.api, self.num_retries, project[u'items'][0])) else: + import sys e = self.inodes.add_entry(CollectionDirectory( self.inode, self.inodes, self.api, self.num_retries, k)) @@ -696,7 +726,7 @@ class TagsDirectory(Directory): """A special directory that contains as subdirectories all tags visible to the user.""" def __init__(self, parent_inode, inodes, api, num_retries, poll_time=60): - super(TagsDirectory, self).__init__(parent_inode, inodes) + super(TagsDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self._poll = True @@ -753,7 +783,7 @@ class TagDirectory(Directory): def __init__(self, parent_inode, inodes, api, num_retries, tag, poll=False, poll_time=60): - super(TagDirectory, self).__init__(parent_inode, inodes) + super(TagDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self.tag = tag @@ -783,7 +813,7 @@ class ProjectDirectory(Directory): def __init__(self, parent_inode, inodes, api, num_retries, project_object, poll=False, poll_time=60): - super(ProjectDirectory, self).__init__(parent_inode, inodes) + super(ProjectDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self.project_object = project_object @@ -897,16 +927,25 @@ class ProjectDirectory(Directory): elif self._full_listing or super(ProjectDirectory, self).__contains__(k): return super(ProjectDirectory, self).__getitem__(k) with llfuse.lock_released: + k2 = self.unsanitize_filename(k) + if k2 == k: + namefilter = ["name", "=", k] + else: + namefilter = ["name", "in", [k, k2]] contents = self.api.groups().list(filters=[["owner_uuid", "=", self.project_uuid], ["group_class", "=", "project"], - ["name", "=", k]], - limit=1).execute(num_retries=self.num_retries)["items"] + namefilter], + limit=2).execute(num_retries=self.num_retries)["items"] if not contents: contents = self.api.collections().list(filters=[["owner_uuid", "=", self.project_uuid], - ["name", "=", k]], - limit=1).execute(num_retries=self.num_retries)["items"] + namefilter], + limit=2).execute(num_retries=self.num_retries)["items"] if contents: - name = sanitize_filename(self.namefn(contents[0])) + if len(contents) > 1 and contents[1]['name'] == k: + # If "foo/bar" and "foo[SUBST]bar" both exist, use + # "foo[SUBST]bar". + contents = [contents[1]] + name = self.sanitize_filename(self.namefn(contents[0])) if name != k: raise KeyError(k) return self._add_entry(contents[0], name) @@ -995,8 +1034,8 @@ class ProjectDirectory(Directory): new_attrs = properties.get("new_attributes") or {} old_attrs["uuid"] = ev["object_uuid"] new_attrs["uuid"] = ev["object_uuid"] - old_name = sanitize_filename(self.namefn(old_attrs)) - new_name = sanitize_filename(self.namefn(new_attrs)) + old_name = self.sanitize_filename(self.namefn(old_attrs)) + new_name = self.sanitize_filename(self.namefn(new_attrs)) # create events will have a new name, but not an old name # delete events will have an old name, but not a new name @@ -1038,7 +1077,7 @@ class SharedDirectory(Directory): def __init__(self, parent_inode, inodes, api, num_retries, exclude, poll=False, poll_time=60): - super(SharedDirectory, self).__init__(parent_inode, inodes) + super(SharedDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self.current_user = api.users().current().execute(num_retries=num_retries) diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py index f539b3f7d0..593d945cff 100644 --- a/services/fuse/tests/test_mount.py +++ b/services/fuse/tests/test_mount.py @@ -20,6 +20,7 @@ import arvados import arvados_fuse as fuse from . import run_test_server +from .integration_test import IntegrationTest from .mount_test_base import MountTestBase logger = logging.getLogger('arvados.arv-mount') @@ -1098,8 +1099,9 @@ class MagicDirApiError(FuseMagicTest): llfuse.listdir(os.path.join(self.mounttmp, self.testcollection)) -class FuseUnitTest(unittest.TestCase): +class SanitizeFilenameTest(MountTestBase): def test_sanitize_filename(self): + pdir = fuse.ProjectDirectory(1, {}, self.api, 0, project_object=self.api.users().current().execute()) acceptable = [ "foo.txt", ".foo", @@ -1119,15 +1121,15 @@ class FuseUnitTest(unittest.TestCase): "//", ] for f in acceptable: - self.assertEqual(f, fuse.sanitize_filename(f)) + self.assertEqual(f, pdir.sanitize_filename(f)) for f in unacceptable: - self.assertNotEqual(f, fuse.sanitize_filename(f)) + self.assertNotEqual(f, pdir.sanitize_filename(f)) # The sanitized filename should be the same length, though. - self.assertEqual(len(f), len(fuse.sanitize_filename(f))) + self.assertEqual(len(f), len(pdir.sanitize_filename(f))) # Special cases - self.assertEqual("_", fuse.sanitize_filename("")) - self.assertEqual("_", fuse.sanitize_filename(".")) - self.assertEqual("__", fuse.sanitize_filename("..")) + self.assertEqual("_", pdir.sanitize_filename("")) + self.assertEqual("_", pdir.sanitize_filename(".")) + self.assertEqual("__", pdir.sanitize_filename("..")) class FuseMagicTestPDHOnly(MountTestBase): @@ -1191,3 +1193,63 @@ class FuseMagicTestPDHOnly(MountTestBase): def test_with_default_by_id(self): self.verify_pdh_only(skip_pdh_only=True) + + +class SlashSubstitutionTest(IntegrationTest): + mnt_args = [ + '--read-write', + '--mount-home', 'zzz', + ] + + def setUp(self): + super(SlashSubstitutionTest, self).setUp() + self.api = arvados.safeapi.ThreadSafeApiCache(arvados.config.settings()) + self.api.config = lambda: {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}} + self.testcoll = self.api.collections().create(body={"name": "foo/bar/baz"}).execute() + self.testcolleasy = self.api.collections().create(body={"name": "foo-bar-baz"}).execute() + self.fusename = 'foo[SLASH]bar[SLASH]baz' + + @IntegrationTest.mount(argv=mnt_args) + @mock.patch('arvados.util.get_config_once') + def test_slash_substitution_before_listing(self, get_config_once): + get_config_once.return_value = {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}} + self.pool_test(os.path.join(self.mnt, 'zzz'), self.fusename) + self.checkContents() + @staticmethod + def _test_slash_substitution_before_listing(self, tmpdir, fusename): + with open(os.path.join(tmpdir, 'foo-bar-baz', 'waz'), 'w') as f: + f.write('xxx') + with open(os.path.join(tmpdir, fusename, 'waz'), 'w') as f: + f.write('foo') + + @IntegrationTest.mount(argv=mnt_args) + @mock.patch('arvados.util.get_config_once') + def test_slash_substitution_after_listing(self, get_config_once): + get_config_once.return_value = {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}} + self.pool_test(os.path.join(self.mnt, 'zzz'), self.fusename) + self.checkContents() + @staticmethod + def _test_slash_substitution_after_listing(self, tmpdir, fusename): + with open(os.path.join(tmpdir, 'foo-bar-baz', 'waz'), 'w') as f: + f.write('xxx') + os.listdir(tmpdir) + with open(os.path.join(tmpdir, fusename, 'waz'), 'w') as f: + f.write('foo') + + def checkContents(self): + self.assertRegexpMatches(self.api.collections().get(uuid=self.testcoll['uuid']).execute()['manifest_text'], ' acbd18db') # md5(foo) + self.assertRegexpMatches(self.api.collections().get(uuid=self.testcolleasy['uuid']).execute()['manifest_text'], ' f561aaf6') # md5(xxx) + + @IntegrationTest.mount(argv=mnt_args) + @mock.patch('arvados.util.get_config_once') + def test_slash_substitution_conflict(self, get_config_once): + self.testcollconflict = self.api.collections().create(body={"name": self.fusename}).execute() + get_config_once.return_value = {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}} + self.pool_test(os.path.join(self.mnt, 'zzz'), self.fusename) + self.assertRegexpMatches(self.api.collections().get(uuid=self.testcollconflict['uuid']).execute()['manifest_text'], ' acbd18db') # md5(foo) + # foo/bar/baz collection unchanged, because it is masked by foo[SLASH]bar[SLASH]baz + self.assertEqual(self.api.collections().get(uuid=self.testcoll['uuid']).execute()['manifest_text'], '') + @staticmethod + def _test_slash_substitution_conflict(self, tmpdir, fusename): + with open(os.path.join(tmpdir, fusename, 'waz'), 'w') as f: + f.write('foo') diff --git a/services/nodemanager/setup.py b/services/nodemanager/setup.py index a2b9a0ca92..75e8f85fbd 100644 --- a/services/nodemanager/setup.py +++ b/services/nodemanager/setup.py @@ -56,5 +56,5 @@ setup(name='arvados-node-manager', 'apache-libcloud==2.5.0', 'subprocess32>=3.5.1', ], - zip_safe=False - ) + zip_safe=False, +) diff --git a/tools/crunchstat-summary/setup.py b/tools/crunchstat-summary/setup.py index 40c5a2f9a3..557b6d3f4e 100755 --- a/tools/crunchstat-summary/setup.py +++ b/tools/crunchstat-summary/setup.py @@ -42,5 +42,5 @@ setup(name='crunchstat_summary', ], test_suite='tests', tests_require=['pbr<1.7.0', 'mock>=1.0'], - zip_safe=False - ) + zip_safe=False, +)