From: Tom Clegg Date: Mon, 17 Feb 2020 16:36:58 +0000 (-0500) Subject: Merge branch '16100-mime-types' X-Git-Tag: 2.1.0~295 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/0a415b6c80c3bf39bb753274aae857eadde2f590?hp=3836d53ef13841dad652e3faeb20660576279afd Merge branch '16100-mime-types' fixes #16100 Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000..5345f045ff --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,96 @@ +Arvados Code of Conduct +======================= + +The Arvados Project is dedicated to providing a harassment-free experience for +everyone. We do not tolerate harassment of participants in any form. + +This code of conduct applies to all Arvados Project spaces both online and off: +Gitter chat, Redmine issues, wiki, mailing lists, forums, video chats, and any other +Arvados spaces. Anyone who violates this code of conduct may be sanctioned or +expelled from these spaces at the discretion of the Arvados Team. + +Some Arvados Project spaces may have additional rules in place, which will be +made clearly available to participants. Participants are responsible for +knowing and abiding by these rules. + +Harassment includes, but is not limited to: + + - Offensive comments related to gender, gender identity and expression, sexual +orientation, disability, mental illness, neuro(a)typicality, physical +appearance, body size, age, race, or religion. + - Unwelcome comments regarding a person’s lifestyle choices and practices, +including those related to food, health, parenting, drugs, and employment. + - Deliberate misgendering or use of [dead](https://www.quora.com/What-is-deadnaming/answer/Nancy-C-Walker) +or rejected names. + - Gratuitous or off-topic sexual images or behaviour in spaces where they’re not +appropriate. + - Physical contact and simulated physical contact (eg, textual descriptions like +“\*hug\*” or “\*backrub\*”) without consent or after a request to stop. + - Threats of violence. + - Incitement of violence towards any individual, including encouraging a person +to commit suicide or to engage in self-harm. + - Deliberate intimidation. + - Stalking or following. + - Harassing photography or recording, including logging online activity for +harassment purposes. + - Sustained disruption of discussion. + - Unwelcome sexual attention. + - Pattern of inappropriate social contact, such as requesting/assuming +inappropriate levels of intimacy with others + - Continued one-on-one communication after requests to cease. + - Deliberate “outing” of any aspect of a person’s identity without their consent +except as necessary to protect vulnerable people from intentional abuse. + - Publication of non-harassing private communication. + +The Arvados Project prioritizes marginalized people’s safety over privileged +people’s comfort. The Arvados Leadership Team will not act on complaints regarding: + + - ‘Reverse’ -isms, including ‘reverse racism,’ ‘reverse sexism,’ and ‘cisphobia’ + - Reasonable communication of boundaries, such as “leave me alone,” “go away,” or +“I’m not discussing this with you.” + - Communicating in a [tone](http://geekfeminism.wikia.com/wiki/Tone_argument) +you don’t find congenial + +Reporting +--------- + +If you are being harassed by a member of the Arvados Project, notice that someone +else is being harassed, or have any other concerns, please contact the Arvados +Project Team at contact@arvados.org. If person who is harassing +you is on the team, they will recuse themselves from handling your incident. We +will respond as promptly as we can. + +This code of conduct applies to Arvados Project spaces, but if you are being +harassed by a member of Arvados Project outside our spaces, we still want to +know about it. We will take all good-faith reports of harassment by Arvados Project +members, especially the Arvados Team, seriously. This includes harassment +outside our spaces and harassment that took place at any point in time. The +abuse team reserves the right to exclude people from the Arvados Project based on +their past behavior, including behavior outside Arvados Project spaces and +behavior towards people who are not in the Arvados Project. + +In order to protect volunteers from abuse and burnout, we reserve the right to +reject any report we believe to have been made in bad faith. Reports intended +to silence legitimate criticism may be deleted without response. + +We will respect confidentiality requests for the purpose of protecting victims +of abuse. At our discretion, we may publicly name a person about whom we’ve +received harassment complaints, or privately warn third parties about them, if +we believe that doing so will increase the safety of Arvados Project members or +the general public. We will not name harassment victims without their +affirmative consent. + +Consequences +------------ + +Participants asked to stop any harassing behavior are expected to comply +immediately. + +If a participant engages in harassing behavior, the Arvados Team may +take any action they deem appropriate, up to and including expulsion from all +Arvados Project spaces and identification of the participant as a harasser to other +Arvados Project members or the general public. + +This anti-harassment policy is based on the [example policy from the Geek +Feminism wiki](http://geekfeminism.wikia.com/wiki/Community_anti-harassment/Policy), +created by the Geek Feminism community. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000..459d7277a5 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,75 @@ +[comment]: # (Copyright © The Arvados Authors. All rights reserved.) +[comment]: # () +[comment]: # (SPDX-License-Identifier: CC-BY-SA-3.0) + +# Contributing + +Arvados is free software, which means it is free for all to use, learn +from, and improve. We encourage contributions from the community that +improve Arvados for everyone. Some examples of contributions are bug +reports, bug fixes, new features, and scripts or documentation that help +with using, administering, or installing Arvados. We also love to +hear about Arvados success stories. + +Those interested in contributing should begin by joining the [Arvados community +channel](https://gitter.im/arvados/community) and telling us about your interest. + +Contributers should also create an account at https://dev.arvados.org +to be able to create and comment on bug tracker issues. The +Arvados public bug tracker is located at +https://dev.arvados.org/projects/arvados/issues . + +Contributers may also be interested in the [development road map](https://dev.arvados.org/issues/gantt?utf8=%E2%9C%93&set_filter=1&gantt=1&f%5B%5D=project_id&op%5Bproject_id%5D=%3D&v%5Bproject_id%5D%5B%5D=49&f%5B%5D=&zoom=1). + +# Development + +Git repositories for primary development are located at +https://git.arvados.org/ and can also be browsed at +https://dev.arvados.org/projects/arvados/repository . Every push to +the master branch is also mirrored to Github at +https://github.com/arvados/arvados . + +Visit [Hacking Arvados](https://dev.arvados.org/projects/arvados/wiki/Hacking) for +detailed information about setting up an Arvados development +environment, development process, coding standards, and notes about specific components. + +If you wish to build the Arvados documentation from a local git clone, see +[doc/README.textile](doc/README.textile) for instructions. + +# Pull requests + +The preferred method for making contributions is through Github pull requests. + +This is the general contribution process: + +1. Fork the Arvados repository using the Github "Fork" button +2. Clone your fork, make your changes, commit to your fork. +3. Every commit message must have a DCO sign-off and every file must have a SPDX license (see below). +4. Add yourself to the [AUTHORS](AUTHORS) file +5. When your fork is ready, through Github, Create a Pull Request against `arvados:master` +6. Notify the core team about your pull request through the [Arvados development +channel](https://gitter.im/arvados/development) or by other means. +7. A member of the core team will review the pull request. They may have questions or comments, or request changes. +8. When the contribution is ready, a member of the core team will +merge the pull request into the master branch, which will +automatically resolve the pull request. + +The Arvados project does not require a contributor agreement in advance, but does require each commit message include a [Developer Certificate of Origin](https://dev.arvados.org/projects/arvados/wiki/Developer_Certificate_Of_Origin). Please ensure *every git commit message* includes `Arvados-DCO-1.1-Signed-off-by`. If you have already made commits without it, fix them with `git commit --amend` or `git rebase`. + +The Developer Certificate of Origin line looks like this: + +``` +Arvados-DCO-1.1-Signed-off-by: Joe Smith +``` + +New files must also include `SPDX-License-Identifier` at the top with one of the three Arvados open source licenses. See [COPYING](COPYING) for details. + +# Continuous integration + +Continuous integration is hosted at https://ci.arvados.org/ + +Currently, external contributers cannot trigger builds. We are investigating integration with Github pull requests for the future. + +[![Build Status](https://ci.arvados.org/buildStatus/icon?job=run-tests)](https://ci.arvados.org/job/run-tests/) + +[![Go Report Card](https://goreportcard.com/badge/github.com/arvados/arvados)](https://goreportcard.com/report/github.com/arvados/arvados) diff --git a/COPYING b/COPYING index 61c31397a0..c549d8a7bb 100644 --- a/COPYING +++ b/COPYING @@ -17,3 +17,7 @@ The full license text for each license is available in this directory: AGPL-3.0: agpl-3.0.txt Apache-2.0: apache-2.0.txt CC-BY-SA-3.0: cc-by-sa-3.0.txt + +As a general rule, code in the sdk/ directory is licensed Apache-2.0, +documentation in the doc/ directory is licensed CC-BY-SA-3.0, and +everything else is licensed AGPL-3.0. \ No newline at end of file diff --git a/README.md b/README.md index 08c102557c..fced2eb5b7 100644 --- a/README.md +++ b/README.md @@ -2,22 +2,47 @@ [comment]: # () [comment]: # (SPDX-License-Identifier: CC-BY-SA-3.0) -[Arvados](https://arvados.org) is a free software distributed computing platform -for bioinformatics, data science, and high throughput analysis of massive data -sets. Arvados supports a variety of cloud, cluster and HPC environments. +[![Join the chat at https://gitter.im/arvados/community](https://badges.gitter.im/arvados/community.svg)](https://gitter.im/arvados/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) | [Installing Arvados](https://doc.arvados.org/install/index.html) | [Installing Client SDKs](https://doc.arvados.org/sdk/index.html) | [Report a bug](https://dev.arvados.org/projects/arvados/issues/new) | [Development and Contributing](CONTRIBUTING.md) -Arvados consists of: + -* *Keep*: a petabyte-scale content-addressed distributed storage system for managing and - storing collections of files, accessible via HTTP and FUSE mount. +[Arvados](https://arvados.org) is an open source platform for +managing, processing, and sharing genomic and other large scientific +and biomedical data. With Arvados, bioinformaticians run and scale +compute-intensive workflows, developers create biomedical +applications, and IT administrators manage large compute and storage +resources. -* *Crunch*: a Docker-based cluster and HPC workflow engine designed providing - strong versioning, reproducibilty, and provenance of computations. +The key components of Arvados are: -* Related services and components including a web workbench for managing files - and compute jobs, REST APIs, SDKs, and other tools. +* *Keep*: Keep is the Arvados storage system for managing and storing large +collections of files. Keep combines content addressing and a +distributed storage architecture resulting in both high reliability +and high throughput. Every file stored in Keep can be accurately +verified every time it is retrieved. Keep supports the creation of +collections as a flexible way to define data sets without having to +re-organize or needlessly copy data. Keep works on a wide range of +underlying filesystems and object stores. -## Quick start +* *Crunch*: Crunch is the orchestration system for running [Common Workflow Language](https://www.commonwl.org) workflows. It is +designed to maintain data provenance and workflow +reproducibility. Crunch automatically tracks data inputs and outputs +through Keep and executes workflow processes in Docker containers. In +a cloud environment, Crunch optimizes costs by scaling compute on demand. + +* *Workbench*: The Workbench web application allows users to interactively access +Arvados functionality. It is especially helpful for querying and +browsing data, visualizing provenance, and tracking the progress of +workflows. + +* *Command Line tools*: The command line interface (CLI) provides convenient access to Arvados +functionality in the Arvados platform from the command line. + +* *API and SDKs*: Arvados is designed to be integrated with existing infrastructure. All +the services in Arvados are accessed through a RESTful API. SDKs are +available for Python, Go, R, Perl, Ruby, and Java. + +# Quick start To try out Arvados on your local workstation, you can use Arvbox, which provides Arvados components pre-installed in a Docker container (requires @@ -32,48 +57,40 @@ In this mode you will only be able to connect to Arvbox from the same host. To configure Arvbox to be accessible over a network and for other options see http://doc.arvados.org/install/arvbox.html for details. -## Documentation +# Documentation -Complete documentation, including a User Guide, Installation documentation and -API documentation is available at http://doc.arvados.org/ +Complete documentation, including the [User Guide](https://doc.arvados.org/user/index.html), [Installation documentation](https://doc.arvados.org/install/index.html), [Administrator documentation](https://doc.arvados.org/admin/index.html) and +[API documentation](https://doc.arvados.org/api/index.html) is available at http://doc.arvados.org/ If you wish to build the Arvados documentation from a local git clone, see -doc/README.textile for instructions. +[doc/README.textile](doc/README.textile) for instructions. -## Community +# Community [![Join the chat at https://gitter.im/arvados/community](https://badges.gitter.im/arvados/community.svg)](https://gitter.im/arvados/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -The [arvados community channel](https://gitter.im/arvados/community) +The [Arvados community channel](https://gitter.im/arvados/community) channel at [gitter.im](https://gitter.im) is available for live discussion and support. -The -[Arvados user mailing list](http://lists.arvados.org/mailman/listinfo/arvados) -is a forum for general discussion, questions, and news about Arvados -development. The -[Arvados developer mailing list](http://lists.arvados.org/mailman/listinfo/arvados-dev) -is a forum for more technical discussion, intended for developers and -contributors to Arvados. +The [Arvados developement channel](https://gitter.im/arvados/development) +channel at [gitter.im](https://gitter.im) is used to coordinate development. -## Development +The [Arvados user mailing list](http://lists.arvados.org/mailman/listinfo/arvados) +is used to announce new versions and other news. -[![Build Status](https://ci.arvados.org/buildStatus/icon?job=run-tests)](https://ci.arvados.org/job/run-tests/) -[![Go Report Card](https://goreportcard.com/badge/github.com/arvados/arvados)](https://goreportcard.com/report/github.com/arvados/arvados) +All participants are expected to abide by the [Arvados Code of Conduct](CODE_OF_CONDUCT.md). -The Arvados public bug tracker is located at https://dev.arvados.org/projects/arvados/issues +# Reporting bugs -Continuous integration is hosted at https://ci.arvados.org/ +[Report a bug](https://dev.arvados.org/projects/arvados/issues/new) on [dev.arvados.org](https://dev.arvados.org). -Instructions for setting up a development environment and working on specific -components can be found on the -["Hacking Arvados" page of the Arvados wiki](https://dev.arvados.org/projects/arvados/wiki/Hacking). +# Development and Contributing -## Contributing +See [CONTRIBUTING](CONTRIBUTING.md) for information about Arvados development and how to contribute to the Arvados project. -When making a pull request, please ensure *every git commit message* includes a one-line [Developer Certificate of Origin](https://dev.arvados.org/projects/arvados/wiki/Developer_Certificate_Of_Origin). If you have already made commits without it, fix them with `git commit --amend` or `git rebase`. +The [development road map](https://dev.arvados.org/issues/gantt?utf8=%E2%9C%93&set_filter=1&gantt=1&f%5B%5D=project_id&op%5Bproject_id%5D=%3D&v%5Bproject_id%5D%5B%5D=49&f%5B%5D=&zoom=1) outlines some of the project priorities over the next twelve months. -## Licensing +# Licensing -Arvados is Free Software. See COPYING for information about Arvados Free -Software licenses. +Arvados is Free Software. See [COPYING](COPYING) for information about the open source licenses used in Arvados. diff --git a/build/package-build-dockerfiles/centos7/Dockerfile b/build/package-build-dockerfiles/centos7/Dockerfile index 9c2660387a..faaf91f43b 100644 --- a/build/package-build-dockerfiles/centos7/Dockerfile +++ b/build/package-build-dockerfiles/centos7/Dockerfile @@ -3,7 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0 FROM centos:7 -MAINTAINER Ward Vandewege +MAINTAINER Arvados Package Maintainers # Install dependencies. RUN yum -q -y install make automake gcc gcc-c++ libyaml-devel patch readline-devel zlib-devel libffi-devel openssl-devel bzip2 libtool bison sqlite-devel rpm-build git perl-ExtUtils-MakeMaker libattr-devel nss-devel libcurl-devel which tar unzip scl-utils centos-release-scl postgresql-devel python-devel python-setuptools fuse-devel xz-libs git python-virtualenv wget diff --git a/build/package-build-dockerfiles/ubuntu1604/Dockerfile b/build/package-build-dockerfiles/ubuntu1604/Dockerfile index e046ae1690..6b1304265b 100644 --- a/build/package-build-dockerfiles/ubuntu1604/Dockerfile +++ b/build/package-build-dockerfiles/ubuntu1604/Dockerfile @@ -3,7 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0 FROM ubuntu:xenial -MAINTAINER Ward Vandewege +MAINTAINER Arvados Package Maintainers ENV DEBIAN_FRONTEND noninteractive diff --git a/build/package-build-dockerfiles/ubuntu1804/Dockerfile b/build/package-build-dockerfiles/ubuntu1804/Dockerfile index c652fe1c08..58bff61603 100644 --- a/build/package-build-dockerfiles/ubuntu1804/Dockerfile +++ b/build/package-build-dockerfiles/ubuntu1804/Dockerfile @@ -3,7 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0 FROM ubuntu:bionic -MAINTAINER Ward Vandewege +MAINTAINER Arvados Package Maintainers ENV DEBIAN_FRONTEND noninteractive diff --git a/build/package-test-dockerfiles/centos7/Dockerfile b/build/package-test-dockerfiles/centos7/Dockerfile index 6508c86be0..3d68cfc00b 100644 --- a/build/package-test-dockerfiles/centos7/Dockerfile +++ b/build/package-test-dockerfiles/centos7/Dockerfile @@ -3,7 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0 FROM centos:7 -MAINTAINER Ward Vandewege +MAINTAINER Arvados Package Maintainers # Install dependencies. RUN yum -q -y install scl-utils centos-release-scl which tar wget diff --git a/build/package-test-dockerfiles/debian10/Dockerfile b/build/package-test-dockerfiles/debian10/Dockerfile index 48e4984d58..32996e4a54 100644 --- a/build/package-test-dockerfiles/debian10/Dockerfile +++ b/build/package-test-dockerfiles/debian10/Dockerfile @@ -3,7 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0 FROM debian:buster -MAINTAINER Ward Vandewege +MAINTAINER Arvados Package Maintainers ENV DEBIAN_FRONTEND noninteractive diff --git a/build/package-test-dockerfiles/debian9/Dockerfile b/build/package-test-dockerfiles/debian9/Dockerfile index 470845ae97..423a9e7c37 100644 --- a/build/package-test-dockerfiles/debian9/Dockerfile +++ b/build/package-test-dockerfiles/debian9/Dockerfile @@ -3,7 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0 FROM debian:stretch -MAINTAINER Ward Vandewege +MAINTAINER Arvados Package Maintainers ENV DEBIAN_FRONTEND noninteractive diff --git a/build/package-test-dockerfiles/ubuntu1604/Dockerfile b/build/package-test-dockerfiles/ubuntu1604/Dockerfile index c35deebf10..e0432c20ee 100644 --- a/build/package-test-dockerfiles/ubuntu1604/Dockerfile +++ b/build/package-test-dockerfiles/ubuntu1604/Dockerfile @@ -3,7 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0 FROM ubuntu:xenial -MAINTAINER Ward Vandewege +MAINTAINER Arvados Package Maintainers ENV DEBIAN_FRONTEND noninteractive diff --git a/build/package-test-dockerfiles/ubuntu1804/Dockerfile b/build/package-test-dockerfiles/ubuntu1804/Dockerfile index 60296e6be1..2d4189879e 100644 --- a/build/package-test-dockerfiles/ubuntu1804/Dockerfile +++ b/build/package-test-dockerfiles/ubuntu1804/Dockerfile @@ -3,7 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0 FROM ubuntu:bionic -MAINTAINER Ward Vandewege +MAINTAINER Arvados Package Maintainers ENV DEBIAN_FRONTEND noninteractive diff --git a/build/run-tests.sh b/build/run-tests.sh index f21861762d..891faca419 100755 --- a/build/run-tests.sh +++ b/build/run-tests.sh @@ -90,6 +90,7 @@ lib/dispatchcloud/container lib/dispatchcloud/scheduler lib/dispatchcloud/ssh_executor lib/dispatchcloud/worker +lib/mount lib/service services/api services/arv-git-httpd @@ -647,8 +648,8 @@ install_env() { . "$VENVDIR/bin/activate" # Needed for run_test_server.py which is used by certain (non-Python) tests. - pip install --no-cache-dir PyYAML future \ - || fatal "pip install PyYAML failed" + pip install --no-cache-dir PyYAML future httplib2 \ + || fatal "`pip install PyYAML future httplib2` failed" # Preinstall libcloud if using a fork; otherwise nodemanager "pip # install" won't pick it up by default. @@ -1099,6 +1100,7 @@ install_deps() { do_install sdk/cli do_install sdk/perl do_install sdk/python pip + do_install sdk/python pip3 do_install sdk/ruby do_install services/api do_install services/arv-git-httpd go diff --git a/cmd/arvados-client/Makefile b/cmd/arvados-client/Makefile new file mode 100644 index 0000000000..b043fc90e2 --- /dev/null +++ b/cmd/arvados-client/Makefile @@ -0,0 +1,11 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +all: + @printf "*** note *** due to an xgo limitation, this only works when the working tree is in GOPATH\n\n" + go mod download + docker build --tag=cgofuse --build-arg=http_proxy="$(http_proxy)" --build-arg=https_proxy="$(https_proxy)" https://github.com/arvados/cgofuse.git + go run github.com/karalabe/xgo --image=cgofuse --targets=linux/amd64,linux/386,darwin/amd64,darwin/386,windows/amd64,windows/386 . + install arvados-* "$(GOPATH)"/bin/ + rm --interactive=never arvados-* diff --git a/cmd/arvados-client/cmd.go b/cmd/arvados-client/cmd.go index bc6c7f0021..887bc62bb3 100644 --- a/cmd/arvados-client/cmd.go +++ b/cmd/arvados-client/cmd.go @@ -9,6 +9,7 @@ import ( "git.arvados.org/arvados.git/lib/cli" "git.arvados.org/arvados.git/lib/cmd" + "git.arvados.org/arvados.git/lib/mount" ) var ( @@ -50,6 +51,8 @@ var ( "user": cli.APICall, "virtual_machine": cli.APICall, "workflow": cli.APICall, + + "mount": mount.Command, }) ) diff --git a/go.mod b/go.mod index 033723d236..2f18527340 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/Microsoft/go-winio v0.4.5 // indirect github.com/alcortesm/tgz v0.0.0-20161220082320-9c5fe88206d7 // indirect github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239 // indirect + github.com/arvados/cgofuse v1.2.0 github.com/aws/aws-sdk-go v1.25.30 github.com/coreos/go-oidc v2.1.0+incompatible github.com/coreos/go-systemd v0.0.0-20180108085132-cc4f39464dc7 @@ -30,6 +31,7 @@ require ( github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect github.com/jmcvetta/randutil v0.0.0-20150817122601-2bb1b664bcff github.com/julienschmidt/httprouter v1.2.0 + github.com/karalabe/xgo v0.0.0-20191115072854-c5ccff8648a7 // indirect github.com/kevinburke/ssh_config v0.0.0-20171013211458-802051befeb5 // indirect github.com/lib/pq v0.0.0-20171126050459-83612a56d3dd github.com/marstr/guid v1.1.1-0.20170427235115-8bdf7d1a087c // indirect diff --git a/go.sum b/go.sum index d7a022dda9..0a543fde90 100644 --- a/go.sum +++ b/go.sum @@ -17,6 +17,8 @@ github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRF github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239 h1:kFOfPq6dUM1hTo4JG6LR5AXSUEsOjtdm0kw0FtQtMJA= github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c= +github.com/arvados/cgofuse v1.2.0 h1:sWgVxyvSFjH965Uc7ReScn/cBl9Jemc9SeUNlEmjRH4= +github.com/arvados/cgofuse v1.2.0/go.mod h1:79WFV98hrkRHK9XPhh2IGGOwpFSjocsWubgxAs2KhRc= github.com/arvados/goamz v0.0.0-20190905141525-1bba09f407ef h1:cl7DIRbiAYNqaVxg3CZY8qfZoBOKrj06H/x9SPGaxas= github.com/arvados/goamz v0.0.0-20190905141525-1bba09f407ef/go.mod h1:rCtgyMmBGEbjTm37fCuBYbNL0IhztiALzo3OB9HyiOM= github.com/aws/aws-sdk-go v1.25.30 h1:I9qj6zW3mMfsg91e+GMSN/INcaX9tTFvr/l/BAHKaIY= @@ -32,8 +34,6 @@ github.com/coreos/go-oidc v2.1.0+incompatible h1:sdJrfw8akMnCuUlaZU3tE/uYXFgfqom github.com/coreos/go-oidc v2.1.0+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc= github.com/coreos/go-systemd v0.0.0-20180108085132-cc4f39464dc7 h1:e3u8KWFMR3irlDo1Z/tL8Hsz1MJmCLkSoX5AZRMKZkg= github.com/coreos/go-systemd v0.0.0-20180108085132-cc4f39464dc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/curoverse/goamz v0.0.0-20190905141525-1bba09f407ef h1:k3Q9m06dbTShrR4phl/QNi15ZSPkIwgyQmNvJRcXR3Y= -github.com/curoverse/goamz v0.0.0-20190905141525-1bba09f407ef/go.mod h1:NUkr+hZ9k+l0cEXg9S7EW8+UIfPkP/hNy2Ga0QVPZ88= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -104,6 +104,8 @@ github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/u github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/julienschmidt/httprouter v1.2.0 h1:TDTW5Yz1mjftljbcKqRcrYhd4XeOoI98t+9HbQbYf7g= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= +github.com/karalabe/xgo v0.0.0-20191115072854-c5ccff8648a7 h1:AYzjK/SHz6m6mg5iuFwkrAhCc14jvCpW9d6frC9iDPE= +github.com/karalabe/xgo v0.0.0-20191115072854-c5ccff8648a7/go.mod h1:iYGcTYIPUvEWhFo6aKUuLchs+AV4ssYdyuBbQJZGcBk= github.com/kevinburke/ssh_config v0.0.0-20171013211458-802051befeb5 h1:xXn0nBttYwok7DhU4RxqaADEpQn7fEMt5kKc3yoj/n0= github.com/kevinburke/ssh_config v0.0.0-20171013211458-802051befeb5/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM= github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk= diff --git a/lib/mount/command.go b/lib/mount/command.go new file mode 100644 index 0000000000..86a9085bda --- /dev/null +++ b/lib/mount/command.go @@ -0,0 +1,86 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package mount + +import ( + "flag" + "io" + "log" + "net/http" + _ "net/http/pprof" + "os" + + "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/arvadosclient" + "git.arvados.org/arvados.git/sdk/go/keepclient" + "github.com/arvados/cgofuse/fuse" +) + +var Command = &cmd{} + +type cmd struct { + // ready, if non-nil, will be closed when the mount is + // initialized. If ready is non-nil, it RunCommand() should + // not be called more than once, or when ready is already + // closed. + ready chan struct{} + // It is safe to call Unmount only after ready has been + // closed. + Unmount func() (ok bool) +} + +// RunCommand implements the subcommand "mount [fuse options]". +// +// The "-d" fuse option (and perhaps other features) ignores the +// stderr argument and prints to os.Stderr instead. +func (c *cmd) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { + logger := log.New(stderr, prog+" ", 0) + flags := flag.NewFlagSet(prog, flag.ContinueOnError) + ro := flags.Bool("ro", false, "read-only") + experimental := flags.Bool("experimental", false, "acknowledge this is an experimental command, and should not be used in production (required)") + blockCache := flags.Int("block-cache", 4, "read cache size (number of 64MiB blocks)") + pprof := flags.String("pprof", "", "serve Go profile data at `[addr]:port`") + err := flags.Parse(args) + if err != nil { + logger.Print(err) + return 2 + } + if !*experimental { + logger.Printf("error: experimental command %q used without --experimental flag", prog) + return 2 + } + if *pprof != "" { + go func() { + log.Println(http.ListenAndServe(*pprof, nil)) + }() + } + + client := arvados.NewClientFromEnv() + ac, err := arvadosclient.New(client) + if err != nil { + logger.Print(err) + return 1 + } + kc, err := keepclient.MakeKeepClient(ac) + if err != nil { + logger.Print(err) + return 1 + } + kc.BlockCache = &keepclient.BlockCache{MaxBlocks: *blockCache} + host := fuse.NewFileSystemHost(&keepFS{ + Client: client, + KeepClient: kc, + ReadOnly: *ro, + Uid: os.Getuid(), + Gid: os.Getgid(), + ready: c.ready, + }) + c.Unmount = host.Unmount + ok := host.Mount("", flags.Args()) + if !ok { + return 1 + } + return 0 +} diff --git a/lib/mount/command_test.go b/lib/mount/command_test.go new file mode 100644 index 0000000000..980b7d2ae3 --- /dev/null +++ b/lib/mount/command_test.go @@ -0,0 +1,81 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package mount + +import ( + "bytes" + "encoding/json" + "io/ioutil" + "os" + "time" + + "git.arvados.org/arvados.git/sdk/go/arvadostest" + check "gopkg.in/check.v1" +) + +var _ = check.Suite(&CmdSuite{}) + +type CmdSuite struct { + mnt string +} + +func (s *CmdSuite) SetUpTest(c *check.C) { + tmpdir, err := ioutil.TempDir("", "") + c.Assert(err, check.IsNil) + s.mnt = tmpdir +} + +func (s *CmdSuite) TearDownTest(c *check.C) { + c.Check(os.RemoveAll(s.mnt), check.IsNil) +} + +func (s *CmdSuite) TestMount(c *check.C) { + exited := make(chan int) + stdin := bytes.NewBufferString("stdin") + stdout := bytes.NewBuffer(nil) + stderr := bytes.NewBuffer(nil) + mountCmd := cmd{ready: make(chan struct{})} + ready := false + go func() { + exited <- mountCmd.RunCommand("test mount", []string{"--experimental", s.mnt}, stdin, stdout, stderr) + }() + go func() { + <-mountCmd.ready + ready = true + + f, err := os.Open(s.mnt + "/by_id/" + arvadostest.FooCollection) + if c.Check(err, check.IsNil) { + dirnames, err := f.Readdirnames(-1) + c.Check(err, check.IsNil) + c.Check(dirnames, check.DeepEquals, []string{"foo"}) + f.Close() + } + + buf, err := ioutil.ReadFile(s.mnt + "/by_id/" + arvadostest.FooCollection + "/.arvados#collection") + if c.Check(err, check.IsNil) { + var m map[string]interface{} + err = json.Unmarshal(buf, &m) + c.Check(err, check.IsNil) + c.Check(m["manifest_text"], check.Matches, `\. acbd.* 0:3:foo\n`) + } + + _, err = os.Open(s.mnt + "/by_id/zzzzz-4zz18-does-not-exist") + c.Check(os.IsNotExist(err), check.Equals, true) + + ok := mountCmd.Unmount() + c.Check(ok, check.Equals, true) + }() + select { + case <-time.After(5 * time.Second): + c.Fatal("timed out") + case errCode, ok := <-exited: + c.Check(ok, check.Equals, true) + c.Check(errCode, check.Equals, 0) + } + c.Check(ready, check.Equals, true) + c.Check(stdout.String(), check.Equals, "") + // stdin should not have been read + c.Check(stdin.String(), check.Equals, "stdin") +} diff --git a/lib/mount/fs.go b/lib/mount/fs.go new file mode 100644 index 0000000000..c008b96af6 --- /dev/null +++ b/lib/mount/fs.go @@ -0,0 +1,392 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package mount + +import ( + "io" + "log" + "os" + "runtime/debug" + "sync" + + "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/keepclient" + "github.com/arvados/cgofuse/fuse" +) + +// sharedFile wraps arvados.File with a sync.Mutex, so fuse can safely +// use a single filehandle concurrently on behalf of multiple +// threads/processes. +type sharedFile struct { + arvados.File + sync.Mutex +} + +// keepFS implements cgofuse's FileSystemInterface. +type keepFS struct { + fuse.FileSystemBase + Client *arvados.Client + KeepClient *keepclient.KeepClient + ReadOnly bool + Uid int + Gid int + + root arvados.CustomFileSystem + open map[uint64]*sharedFile + lastFH uint64 + sync.RWMutex + + // If non-nil, this channel will be closed by Init() to notify + // other goroutines that the mount is ready. + ready chan struct{} +} + +var ( + invalidFH = ^uint64(0) +) + +// newFH wraps f in a sharedFile, adds it to fs's lookup table using a +// new handle number, and returns the handle number. +func (fs *keepFS) newFH(f arvados.File) uint64 { + fs.Lock() + defer fs.Unlock() + if fs.open == nil { + fs.open = make(map[uint64]*sharedFile) + } + fs.lastFH++ + fh := fs.lastFH + fs.open[fh] = &sharedFile{File: f} + return fh +} + +func (fs *keepFS) lookupFH(fh uint64) *sharedFile { + fs.RLock() + defer fs.RUnlock() + return fs.open[fh] +} + +func (fs *keepFS) Init() { + defer fs.debugPanics() + fs.root = fs.Client.SiteFileSystem(fs.KeepClient) + fs.root.MountProject("home", "") + if fs.ready != nil { + close(fs.ready) + } +} + +func (fs *keepFS) Create(path string, flags int, mode uint32) (errc int, fh uint64) { + defer fs.debugPanics() + if fs.ReadOnly { + return -fuse.EROFS, invalidFH + } + f, err := fs.root.OpenFile(path, flags|os.O_CREATE, os.FileMode(mode)) + if err == os.ErrExist { + return -fuse.EEXIST, invalidFH + } else if err != nil { + return -fuse.EINVAL, invalidFH + } + return 0, fs.newFH(f) +} + +func (fs *keepFS) Open(path string, flags int) (errc int, fh uint64) { + defer fs.debugPanics() + if fs.ReadOnly && flags&(os.O_RDWR|os.O_WRONLY|os.O_CREATE) != 0 { + return -fuse.EROFS, invalidFH + } + f, err := fs.root.OpenFile(path, flags, 0) + if err != nil { + return -fuse.ENOENT, invalidFH + } else if fi, err := f.Stat(); err != nil { + return -fuse.EIO, invalidFH + } else if fi.IsDir() { + f.Close() + return -fuse.EISDIR, invalidFH + } + return 0, fs.newFH(f) +} + +func (fs *keepFS) Utimens(path string, tmsp []fuse.Timespec) int { + defer fs.debugPanics() + if fs.ReadOnly { + return -fuse.EROFS + } + f, err := fs.root.OpenFile(path, 0, 0) + if err != nil { + return fs.errCode(err) + } + f.Close() + return 0 +} + +func (fs *keepFS) errCode(err error) int { + if os.IsNotExist(err) { + return -fuse.ENOENT + } + switch err { + case os.ErrExist: + return -fuse.EEXIST + case arvados.ErrInvalidArgument: + return -fuse.EINVAL + case arvados.ErrInvalidOperation: + return -fuse.ENOSYS + case arvados.ErrDirectoryNotEmpty: + return -fuse.ENOTEMPTY + case nil: + return 0 + default: + return -fuse.EIO + } +} + +func (fs *keepFS) Mkdir(path string, mode uint32) int { + defer fs.debugPanics() + if fs.ReadOnly { + return -fuse.EROFS + } + f, err := fs.root.OpenFile(path, os.O_CREATE|os.O_EXCL, os.FileMode(mode)|os.ModeDir) + if err != nil { + return fs.errCode(err) + } + f.Close() + return 0 +} + +func (fs *keepFS) Opendir(path string) (errc int, fh uint64) { + defer fs.debugPanics() + f, err := fs.root.OpenFile(path, 0, 0) + if err != nil { + return fs.errCode(err), invalidFH + } else if fi, err := f.Stat(); err != nil { + return fs.errCode(err), invalidFH + } else if !fi.IsDir() { + f.Close() + return -fuse.ENOTDIR, invalidFH + } + return 0, fs.newFH(f) +} + +func (fs *keepFS) Releasedir(path string, fh uint64) (errc int) { + defer fs.debugPanics() + return fs.Release(path, fh) +} + +func (fs *keepFS) Rmdir(path string) int { + defer fs.debugPanics() + return fs.errCode(fs.root.Remove(path)) +} + +func (fs *keepFS) Release(path string, fh uint64) (errc int) { + defer fs.debugPanics() + fs.Lock() + defer fs.Unlock() + defer delete(fs.open, fh) + if f := fs.open[fh]; f != nil { + err := f.Close() + if err != nil { + return -fuse.EIO + } + } + return 0 +} + +func (fs *keepFS) Rename(oldname, newname string) (errc int) { + defer fs.debugPanics() + if fs.ReadOnly { + return -fuse.EROFS + } + return fs.errCode(fs.root.Rename(oldname, newname)) +} + +func (fs *keepFS) Unlink(path string) (errc int) { + defer fs.debugPanics() + if fs.ReadOnly { + return -fuse.EROFS + } + return fs.errCode(fs.root.Remove(path)) +} + +func (fs *keepFS) Truncate(path string, size int64, fh uint64) (errc int) { + defer fs.debugPanics() + if fs.ReadOnly { + return -fuse.EROFS + } + + // Sometimes fh is a valid filehandle and we don't need to + // waste a name lookup. + if f := fs.lookupFH(fh); f != nil { + return fs.errCode(f.Truncate(size)) + } + + // Other times, fh is invalid and we need to lookup path. + f, err := fs.root.OpenFile(path, os.O_RDWR, 0) + if err != nil { + return fs.errCode(err) + } + defer f.Close() + return fs.errCode(f.Truncate(size)) +} + +func (fs *keepFS) Getattr(path string, stat *fuse.Stat_t, fh uint64) (errc int) { + defer fs.debugPanics() + var fi os.FileInfo + var err error + if f := fs.lookupFH(fh); f != nil { + // Valid filehandle -- ignore path. + fi, err = f.Stat() + } else { + // Invalid filehandle -- lookup path. + fi, err = fs.root.Stat(path) + } + if err != nil { + return fs.errCode(err) + } + fs.fillStat(stat, fi) + return 0 +} + +func (fs *keepFS) Chmod(path string, mode uint32) (errc int) { + if fs.ReadOnly { + return -fuse.EROFS + } + if fi, err := fs.root.Stat(path); err != nil { + return fs.errCode(err) + } else if mode & ^uint32(fuse.S_IFREG|fuse.S_IFDIR|0777) != 0 { + // Refuse to set mode bits other than + // regfile/dir/perms + return -fuse.ENOSYS + } else if (fi.Mode()&os.ModeDir != 0) != (mode&fuse.S_IFDIR != 0) { + // Refuse to transform a regular file to a dir, or + // vice versa + return -fuse.ENOSYS + } + // As long as the change isn't nonsense, chmod is a no-op, + // because we don't save permission bits. + return 0 +} + +func (fs *keepFS) fillStat(stat *fuse.Stat_t, fi os.FileInfo) { + defer fs.debugPanics() + var m uint32 + if fi.IsDir() { + m = m | fuse.S_IFDIR + } else { + m = m | fuse.S_IFREG + } + m = m | uint32(fi.Mode()&os.ModePerm) + stat.Mode = m + stat.Nlink = 1 + stat.Size = fi.Size() + t := fuse.NewTimespec(fi.ModTime()) + stat.Mtim = t + stat.Ctim = t + stat.Atim = t + stat.Birthtim = t + stat.Blksize = 1024 + stat.Blocks = (stat.Size + stat.Blksize - 1) / stat.Blksize + if fs.Uid > 0 && int64(fs.Uid) < 1<<31 { + stat.Uid = uint32(fs.Uid) + } + if fs.Gid > 0 && int64(fs.Gid) < 1<<31 { + stat.Gid = uint32(fs.Gid) + } +} + +func (fs *keepFS) Write(path string, buf []byte, ofst int64, fh uint64) (n int) { + defer fs.debugPanics() + if fs.ReadOnly { + return -fuse.EROFS + } + f := fs.lookupFH(fh) + if f == nil { + return -fuse.EBADF + } + f.Lock() + defer f.Unlock() + if _, err := f.Seek(ofst, io.SeekStart); err != nil { + return fs.errCode(err) + } + n, err := f.Write(buf) + if err != nil { + log.Printf("error writing %q: %s", path, err) + return fs.errCode(err) + } + return n +} + +func (fs *keepFS) Read(path string, buf []byte, ofst int64, fh uint64) (n int) { + defer fs.debugPanics() + f := fs.lookupFH(fh) + if f == nil { + return -fuse.EBADF + } + f.Lock() + defer f.Unlock() + if _, err := f.Seek(ofst, io.SeekStart); err != nil { + return fs.errCode(err) + } + n, err := f.Read(buf) + for err == nil && n < len(buf) { + // f is an io.Reader ("If some data is available but + // not len(p) bytes, Read conventionally returns what + // is available instead of waiting for more") -- but + // our caller requires us to either fill buf or reach + // EOF. + done := n + n, err = f.Read(buf[done:]) + n += done + } + if err != nil && err != io.EOF { + log.Printf("error reading %q: %s", path, err) + return fs.errCode(err) + } + return n +} + +func (fs *keepFS) Readdir(path string, + fill func(name string, stat *fuse.Stat_t, ofst int64) bool, + ofst int64, + fh uint64) (errc int) { + defer fs.debugPanics() + f := fs.lookupFH(fh) + if f == nil { + return -fuse.EBADF + } + fill(".", nil, 0) + fill("..", nil, 0) + var stat fuse.Stat_t + fis, err := f.Readdir(-1) + if err != nil { + return fs.errCode(err) + } + for _, fi := range fis { + fs.fillStat(&stat, fi) + fill(fi.Name(), &stat, 0) + } + return 0 +} + +func (fs *keepFS) Fsync(path string, datasync bool, fh uint64) int { + defer fs.debugPanics() + f := fs.lookupFH(fh) + if f == nil { + return -fuse.EBADF + } + return fs.errCode(f.Sync()) +} + +func (fs *keepFS) Fsyncdir(path string, datasync bool, fh uint64) int { + return fs.Fsync(path, datasync, fh) +} + +// debugPanics (when deferred by keepFS handlers) prints an error and +// stack trace on stderr when a handler crashes. (Without this, +// cgofuse recovers from panics silently and returns EIO.) +func (fs *keepFS) debugPanics() { + if err := recover(); err != nil { + log.Printf("(%T) %v", err, err) + debug.PrintStack() + panic(err) + } +} diff --git a/lib/mount/fs_test.go b/lib/mount/fs_test.go new file mode 100644 index 0000000000..fef2c0f069 --- /dev/null +++ b/lib/mount/fs_test.go @@ -0,0 +1,49 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package mount + +import ( + "testing" + + "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/arvadosclient" + "git.arvados.org/arvados.git/sdk/go/keepclient" + "github.com/arvados/cgofuse/fuse" + check "gopkg.in/check.v1" +) + +// Gocheck boilerplate +func Test(t *testing.T) { + check.TestingT(t) +} + +var _ = check.Suite(&FSSuite{}) + +type FSSuite struct{} + +func (*FSSuite) TestFuseInterface(c *check.C) { + var _ fuse.FileSystemInterface = &keepFS{} +} + +func (*FSSuite) TestOpendir(c *check.C) { + client := arvados.NewClientFromEnv() + ac, err := arvadosclient.New(client) + c.Assert(err, check.IsNil) + kc, err := keepclient.MakeKeepClient(ac) + c.Assert(err, check.IsNil) + + var fs fuse.FileSystemInterface = &keepFS{ + Client: client, + KeepClient: kc, + } + fs.Init() + errc, fh := fs.Opendir("/by_id") + c.Check(errc, check.Equals, 0) + c.Check(fh, check.Not(check.Equals), uint64(0)) + c.Check(fh, check.Not(check.Equals), invalidFH) + errc, fh = fs.Opendir("/bogus") + c.Check(errc, check.Equals, -fuse.ENOENT) + c.Check(fh, check.Equals, invalidFH) +} diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py index 62ceab2fa1..d4bb6d102a 100644 --- a/sdk/cwl/setup.py +++ b/sdk/cwl/setup.py @@ -36,7 +36,8 @@ setup(name='arvados-cwl-runner', 'bin/arvados-cwl-runner', ], # Note that arvados/build/run-build-packages.sh looks at this - # file to determine what version of cwltool and schema-salad to build. + # file to determine what version of cwltool and schema-salad to + # build. install_requires=[ 'cwltool==1.0.20190831161204', 'schema-salad==4.5.20190815125611', @@ -63,5 +64,5 @@ setup(name='arvados-cwl-runner', 'mock>=1.0,<4', 'subprocess32>=3.5.1', ], - zip_safe=True - ) + zip_safe=True, +) diff --git a/sdk/pam/setup.py b/sdk/pam/setup.py index af00142a04..59b49a19fe 100755 --- a/sdk/pam/setup.py +++ b/sdk/pam/setup.py @@ -53,5 +53,5 @@ setup(name='arvados-pam', ], test_suite='tests', tests_require=['pbr<1.7.0', 'mock>=1.0', 'python-pam'], - zip_safe=False - ) + zip_safe=False, +) diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py index b18ce25fd2..ae687c50bd 100644 --- a/sdk/python/arvados/api.py +++ b/sdk/python/arvados/api.py @@ -237,6 +237,7 @@ def api(version=None, cache=True, host=None, token=None, insecure=False, svc.api_token = token svc.insecure = insecure svc.request_id = request_id + svc.config = lambda: util.get_config_once(svc) kwargs['http'].max_request_size = svc._rootDesc.get('maxRequestSize', 0) kwargs['http'].cache = None kwargs['http']._request_id = lambda: svc.request_id or util.new_request_id() diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py index fd29a3dc1d..9e0a317830 100644 --- a/sdk/python/arvados/util.py +++ b/sdk/python/arvados/util.py @@ -419,3 +419,11 @@ def new_request_id(): rid += chr(c+ord('a')-10) n = n // 36 return rid + +def get_config_once(svc): + if not svc._rootDesc.get('resources')['configs']: + # Old API server version, no config export endpoint + return {} + if not hasattr(svc, '_cached_config'): + svc._cached_config = svc.configs().get().execute() + return svc._cached_config diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py index 0944a31875..3a0316cf9e 100644 --- a/services/fuse/arvados_fuse/__init__.py +++ b/services/fuse/arvados_fuse/__init__.py @@ -98,7 +98,7 @@ else: LLFUSE_VERSION_0 = llfuse.__version__.startswith('0') -from .fusedir import sanitize_filename, Directory, CollectionDirectory, TmpCollectionDirectory, MagicDirectory, TagsDirectory, ProjectDirectory, SharedDirectory, CollectionDirectoryBase +from .fusedir import Directory, CollectionDirectory, TmpCollectionDirectory, MagicDirectory, TagsDirectory, ProjectDirectory, SharedDirectory, CollectionDirectoryBase from .fusefile import StringFile, FuseArvadosFile _logger = logging.getLogger('arvados.arvados_fuse') diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py index 5283367532..7bef8a269f 100644 --- a/services/fuse/arvados_fuse/command.py +++ b/services/fuse/arvados_fuse/command.py @@ -301,7 +301,7 @@ class Mount(object): return e = self.operations.inodes.add_entry(Directory( - llfuse.ROOT_INODE, self.operations.inodes)) + llfuse.ROOT_INODE, self.operations.inodes, self.api.config)) dir_args[0] = e.inode for name in self.args.mount_by_id: diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py index 3287657441..8b12f73e89 100644 --- a/services/fuse/arvados_fuse/fusedir.py +++ b/services/fuse/arvados_fuse/fusedir.py @@ -33,20 +33,6 @@ _logger = logging.getLogger('arvados.arvados_fuse') # appear as underscores in the fuse mount.) _disallowed_filename_characters = re.compile('[\x00/]') -# '.' and '..' are not reachable if API server is newer than #6277 -def sanitize_filename(dirty): - """Replace disallowed filename characters with harmless "_".""" - if dirty is None: - return None - elif dirty == '': - return '_' - elif dirty == '.': - return '_' - elif dirty == '..': - return '__' - else: - return _disallowed_filename_characters.sub('_', dirty) - class Directory(FreshBase): """Generic directory object, backed by a dict. @@ -55,7 +41,7 @@ class Directory(FreshBase): and the value referencing a File or Directory object. """ - def __init__(self, parent_inode, inodes): + def __init__(self, parent_inode, inodes, apiconfig): """parent_inode is the integer inode number""" super(Directory, self).__init__() @@ -65,11 +51,53 @@ class Directory(FreshBase): raise Exception("parent_inode should be an int") self.parent_inode = parent_inode self.inodes = inodes + self.apiconfig = apiconfig self._entries = {} self._mtime = time.time() - # Overriden by subclasses to implement logic to update the entries dict - # when the directory is stale + def forward_slash_subst(self): + if not hasattr(self, '_fsns'): + self._fsns = None + config = self.apiconfig() + try: + self._fsns = config["Collections"]["ForwardSlashNameSubstitution"] + except KeyError: + # old API server with no FSNS config + self._fsns = '_' + else: + if self._fsns == '' or self._fsns == '/': + self._fsns = None + return self._fsns + + def unsanitize_filename(self, incoming): + """Replace ForwardSlashNameSubstitution value with /""" + fsns = self.forward_slash_subst() + if isinstance(fsns, str): + return incoming.replace(fsns, '/') + else: + return incoming + + def sanitize_filename(self, dirty): + """Replace disallowed filename characters according to + ForwardSlashNameSubstitution in self.api_config.""" + # '.' and '..' are not reachable if API server is newer than #6277 + if dirty is None: + return None + elif dirty == '': + return '_' + elif dirty == '.': + return '_' + elif dirty == '..': + return '__' + else: + fsns = self.forward_slash_subst() + if isinstance(fsns, str): + dirty = dirty.replace('/', fsns) + return _disallowed_filename_characters.sub('_', dirty) + + + # Overridden by subclasses to implement logic to update the + # entries dict when the directory is stale @use_counter def update(self): pass @@ -138,7 +166,7 @@ class Directory(FreshBase): self._entries = {} changed = False for i in items: - name = sanitize_filename(fn(i)) + name = self.sanitize_filename(fn(i)) if name: if name in oldentries and same(oldentries[name], i): # move existing directory entry over @@ -246,12 +274,13 @@ class CollectionDirectoryBase(Directory): """ - def __init__(self, parent_inode, inodes, collection): - super(CollectionDirectoryBase, self).__init__(parent_inode, inodes) + def __init__(self, parent_inode, inodes, apiconfig, collection): + super(CollectionDirectoryBase, self).__init__(parent_inode, inodes, apiconfig) + self.apiconfig = apiconfig self.collection = collection def new_entry(self, name, item, mtime): - name = sanitize_filename(name) + name = self.sanitize_filename(name) if hasattr(item, "fuse_entry") and item.fuse_entry is not None: if item.fuse_entry.dead is not True: raise Exception("Can only reparent dead inode entry") @@ -260,7 +289,7 @@ class CollectionDirectoryBase(Directory): item.fuse_entry.dead = False self._entries[name] = item.fuse_entry elif isinstance(item, arvados.collection.RichCollectionBase): - self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, item)) + self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, self.apiconfig, item)) self._entries[name].populate(mtime) else: self._entries[name] = self.inodes.add_entry(FuseArvadosFile(self.inode, item, mtime)) @@ -268,7 +297,7 @@ class CollectionDirectoryBase(Directory): def on_event(self, event, collection, name, item): if collection == self.collection: - name = sanitize_filename(name) + name = self.sanitize_filename(name) _logger.debug("collection notify %s %s %s %s", event, collection, name, item) with llfuse.lock: if event == arvados.collection.ADD: @@ -357,7 +386,7 @@ class CollectionDirectory(CollectionDirectoryBase): """Represents the root of a directory tree representing a collection.""" def __init__(self, parent_inode, inodes, api, num_retries, collection_record=None, explicit_collection=None): - super(CollectionDirectory, self).__init__(parent_inode, inodes, None) + super(CollectionDirectory, self).__init__(parent_inode, inodes, api.config, None) self.api = api self.num_retries = num_retries self.collection_record_file = None @@ -548,7 +577,7 @@ class TmpCollectionDirectory(CollectionDirectoryBase): keep_client=api_client.keep, num_retries=num_retries) super(TmpCollectionDirectory, self).__init__( - parent_inode, inodes, collection) + parent_inode, inodes, api_client.config, collection) self.collection_record_file = None self.populate(self.mtime()) @@ -625,7 +654,7 @@ and the directory will appear if it exists. """.lstrip() def __init__(self, parent_inode, inodes, api, num_retries, pdh_only=False): - super(MagicDirectory, self).__init__(parent_inode, inodes) + super(MagicDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self.pdh_only = pdh_only @@ -660,6 +689,7 @@ and the directory will appear if it exists. e = self.inodes.add_entry(ProjectDirectory( self.inode, self.inodes, self.api, self.num_retries, project[u'items'][0])) else: + import sys e = self.inodes.add_entry(CollectionDirectory( self.inode, self.inodes, self.api, self.num_retries, k)) @@ -696,7 +726,7 @@ class TagsDirectory(Directory): """A special directory that contains as subdirectories all tags visible to the user.""" def __init__(self, parent_inode, inodes, api, num_retries, poll_time=60): - super(TagsDirectory, self).__init__(parent_inode, inodes) + super(TagsDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self._poll = True @@ -753,7 +783,7 @@ class TagDirectory(Directory): def __init__(self, parent_inode, inodes, api, num_retries, tag, poll=False, poll_time=60): - super(TagDirectory, self).__init__(parent_inode, inodes) + super(TagDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self.tag = tag @@ -783,7 +813,7 @@ class ProjectDirectory(Directory): def __init__(self, parent_inode, inodes, api, num_retries, project_object, poll=False, poll_time=60): - super(ProjectDirectory, self).__init__(parent_inode, inodes) + super(ProjectDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self.project_object = project_object @@ -897,16 +927,25 @@ class ProjectDirectory(Directory): elif self._full_listing or super(ProjectDirectory, self).__contains__(k): return super(ProjectDirectory, self).__getitem__(k) with llfuse.lock_released: + k2 = self.unsanitize_filename(k) + if k2 == k: + namefilter = ["name", "=", k] + else: + namefilter = ["name", "in", [k, k2]] contents = self.api.groups().list(filters=[["owner_uuid", "=", self.project_uuid], ["group_class", "=", "project"], - ["name", "=", k]], - limit=1).execute(num_retries=self.num_retries)["items"] + namefilter], + limit=2).execute(num_retries=self.num_retries)["items"] if not contents: contents = self.api.collections().list(filters=[["owner_uuid", "=", self.project_uuid], - ["name", "=", k]], - limit=1).execute(num_retries=self.num_retries)["items"] + namefilter], + limit=2).execute(num_retries=self.num_retries)["items"] if contents: - name = sanitize_filename(self.namefn(contents[0])) + if len(contents) > 1 and contents[1]['name'] == k: + # If "foo/bar" and "foo[SUBST]bar" both exist, use + # "foo[SUBST]bar". + contents = [contents[1]] + name = self.sanitize_filename(self.namefn(contents[0])) if name != k: raise KeyError(k) return self._add_entry(contents[0], name) @@ -995,8 +1034,8 @@ class ProjectDirectory(Directory): new_attrs = properties.get("new_attributes") or {} old_attrs["uuid"] = ev["object_uuid"] new_attrs["uuid"] = ev["object_uuid"] - old_name = sanitize_filename(self.namefn(old_attrs)) - new_name = sanitize_filename(self.namefn(new_attrs)) + old_name = self.sanitize_filename(self.namefn(old_attrs)) + new_name = self.sanitize_filename(self.namefn(new_attrs)) # create events will have a new name, but not an old name # delete events will have an old name, but not a new name @@ -1038,7 +1077,7 @@ class SharedDirectory(Directory): def __init__(self, parent_inode, inodes, api, num_retries, exclude, poll=False, poll_time=60): - super(SharedDirectory, self).__init__(parent_inode, inodes) + super(SharedDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self.current_user = api.users().current().execute(num_retries=num_retries) diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py index f539b3f7d0..593d945cff 100644 --- a/services/fuse/tests/test_mount.py +++ b/services/fuse/tests/test_mount.py @@ -20,6 +20,7 @@ import arvados import arvados_fuse as fuse from . import run_test_server +from .integration_test import IntegrationTest from .mount_test_base import MountTestBase logger = logging.getLogger('arvados.arv-mount') @@ -1098,8 +1099,9 @@ class MagicDirApiError(FuseMagicTest): llfuse.listdir(os.path.join(self.mounttmp, self.testcollection)) -class FuseUnitTest(unittest.TestCase): +class SanitizeFilenameTest(MountTestBase): def test_sanitize_filename(self): + pdir = fuse.ProjectDirectory(1, {}, self.api, 0, project_object=self.api.users().current().execute()) acceptable = [ "foo.txt", ".foo", @@ -1119,15 +1121,15 @@ class FuseUnitTest(unittest.TestCase): "//", ] for f in acceptable: - self.assertEqual(f, fuse.sanitize_filename(f)) + self.assertEqual(f, pdir.sanitize_filename(f)) for f in unacceptable: - self.assertNotEqual(f, fuse.sanitize_filename(f)) + self.assertNotEqual(f, pdir.sanitize_filename(f)) # The sanitized filename should be the same length, though. - self.assertEqual(len(f), len(fuse.sanitize_filename(f))) + self.assertEqual(len(f), len(pdir.sanitize_filename(f))) # Special cases - self.assertEqual("_", fuse.sanitize_filename("")) - self.assertEqual("_", fuse.sanitize_filename(".")) - self.assertEqual("__", fuse.sanitize_filename("..")) + self.assertEqual("_", pdir.sanitize_filename("")) + self.assertEqual("_", pdir.sanitize_filename(".")) + self.assertEqual("__", pdir.sanitize_filename("..")) class FuseMagicTestPDHOnly(MountTestBase): @@ -1191,3 +1193,63 @@ class FuseMagicTestPDHOnly(MountTestBase): def test_with_default_by_id(self): self.verify_pdh_only(skip_pdh_only=True) + + +class SlashSubstitutionTest(IntegrationTest): + mnt_args = [ + '--read-write', + '--mount-home', 'zzz', + ] + + def setUp(self): + super(SlashSubstitutionTest, self).setUp() + self.api = arvados.safeapi.ThreadSafeApiCache(arvados.config.settings()) + self.api.config = lambda: {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}} + self.testcoll = self.api.collections().create(body={"name": "foo/bar/baz"}).execute() + self.testcolleasy = self.api.collections().create(body={"name": "foo-bar-baz"}).execute() + self.fusename = 'foo[SLASH]bar[SLASH]baz' + + @IntegrationTest.mount(argv=mnt_args) + @mock.patch('arvados.util.get_config_once') + def test_slash_substitution_before_listing(self, get_config_once): + get_config_once.return_value = {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}} + self.pool_test(os.path.join(self.mnt, 'zzz'), self.fusename) + self.checkContents() + @staticmethod + def _test_slash_substitution_before_listing(self, tmpdir, fusename): + with open(os.path.join(tmpdir, 'foo-bar-baz', 'waz'), 'w') as f: + f.write('xxx') + with open(os.path.join(tmpdir, fusename, 'waz'), 'w') as f: + f.write('foo') + + @IntegrationTest.mount(argv=mnt_args) + @mock.patch('arvados.util.get_config_once') + def test_slash_substitution_after_listing(self, get_config_once): + get_config_once.return_value = {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}} + self.pool_test(os.path.join(self.mnt, 'zzz'), self.fusename) + self.checkContents() + @staticmethod + def _test_slash_substitution_after_listing(self, tmpdir, fusename): + with open(os.path.join(tmpdir, 'foo-bar-baz', 'waz'), 'w') as f: + f.write('xxx') + os.listdir(tmpdir) + with open(os.path.join(tmpdir, fusename, 'waz'), 'w') as f: + f.write('foo') + + def checkContents(self): + self.assertRegexpMatches(self.api.collections().get(uuid=self.testcoll['uuid']).execute()['manifest_text'], ' acbd18db') # md5(foo) + self.assertRegexpMatches(self.api.collections().get(uuid=self.testcolleasy['uuid']).execute()['manifest_text'], ' f561aaf6') # md5(xxx) + + @IntegrationTest.mount(argv=mnt_args) + @mock.patch('arvados.util.get_config_once') + def test_slash_substitution_conflict(self, get_config_once): + self.testcollconflict = self.api.collections().create(body={"name": self.fusename}).execute() + get_config_once.return_value = {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}} + self.pool_test(os.path.join(self.mnt, 'zzz'), self.fusename) + self.assertRegexpMatches(self.api.collections().get(uuid=self.testcollconflict['uuid']).execute()['manifest_text'], ' acbd18db') # md5(foo) + # foo/bar/baz collection unchanged, because it is masked by foo[SLASH]bar[SLASH]baz + self.assertEqual(self.api.collections().get(uuid=self.testcoll['uuid']).execute()['manifest_text'], '') + @staticmethod + def _test_slash_substitution_conflict(self, tmpdir, fusename): + with open(os.path.join(tmpdir, fusename, 'waz'), 'w') as f: + f.write('foo') diff --git a/services/keep-balance/main.go b/services/keep-balance/main.go index 6e89df9a55..65bd8d4cf0 100644 --- a/services/keep-balance/main.go +++ b/services/keep-balance/main.go @@ -9,6 +9,7 @@ import ( "flag" "fmt" "io" + "net/http" "os" "git.arvados.org/arvados.git/lib/config" @@ -50,10 +51,17 @@ func runCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.W options.Dumper = dumper } - // Only pass along the version flag, which gets handled in RunCommand + // Drop our custom args that would be rejected by the generic + // service.Command args = nil + dropFlag := map[string]bool{ + "once": true, + "commit-pulls": true, + "commit-trash": true, + "dump": true, + } flags.Visit(func(f *flag.Flag) { - if f.Name == "version" { + if !dropFlag[f.Name] { args = append(args, "-"+f.Name, f.Value.String()) } }) @@ -75,6 +83,7 @@ func runCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.W } srv := &Server{ + Handler: http.NotFoundHandler(), Cluster: cluster, ArvClient: ac, RunOptions: options, diff --git a/services/keep-balance/main_test.go b/services/keep-balance/main_test.go new file mode 100644 index 0000000000..a6445506e5 --- /dev/null +++ b/services/keep-balance/main_test.go @@ -0,0 +1,84 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: AGPL-3.0 + +package main + +import ( + "bytes" + "io/ioutil" + "net" + "net/http" + "time" + + check "gopkg.in/check.v1" +) + +var _ = check.Suite(&mainSuite{}) + +type mainSuite struct{} + +func (s *mainSuite) TestVersionFlag(c *check.C) { + var stdout, stderr bytes.Buffer + runCommand("keep-balance", []string{"-version"}, nil, &stdout, &stderr) + c.Check(stderr.String(), check.Equals, "") + c.Log(stdout.String()) +} + +func (s *mainSuite) TestHTTPServer(c *check.C) { + ln, err := net.Listen("tcp", ":0") + if err != nil { + c.Fatal(err) + } + _, p, err := net.SplitHostPort(ln.Addr().String()) + ln.Close() + config := "Clusters:\n zzzzz:\n ManagementToken: abcdefg\n Services: {Keepbalance: {InternalURLs: {'http://localhost:" + p + "/': {}}}}\n" + + var stdout bytes.Buffer + go runCommand("keep-balance", []string{"-config", "-"}, bytes.NewBufferString(config), &stdout, &stdout) + done := make(chan struct{}) + go func() { + defer close(done) + for { + time.Sleep(time.Second / 10) + req, err := http.NewRequest(http.MethodGet, "http://:"+p+"/metrics", nil) + if err != nil { + c.Fatal(err) + return + } + req.Header.Set("Authorization", "Bearer abcdefg") + resp, err := http.DefaultClient.Do(req) + if err != nil { + c.Logf("error %s", err) + continue + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + c.Logf("http status %d", resp.StatusCode) + continue + } + buf, err := ioutil.ReadAll(resp.Body) + if err != nil { + c.Logf("read body: %s", err) + continue + } + c.Check(string(buf), check.Matches, `(?ms).*arvados_keepbalance_sweep_seconds_sum.*`) + return + } + }() + select { + case <-done: + case <-time.After(time.Second): + c.Log(stdout.String()) + c.Fatal("timeout") + } + + // Check non-metrics URL that gets passed through to us from + // service.Command + req, err := http.NewRequest(http.MethodGet, "http://:"+p+"/not-metrics", nil) + c.Assert(err, check.IsNil) + resp, err := http.DefaultClient.Do(req) + c.Check(err, check.IsNil) + defer resp.Body.Close() + c.Check(resp.StatusCode, check.Equals, http.StatusNotFound) +} diff --git a/services/nodemanager/setup.py b/services/nodemanager/setup.py index a2b9a0ca92..75e8f85fbd 100644 --- a/services/nodemanager/setup.py +++ b/services/nodemanager/setup.py @@ -56,5 +56,5 @@ setup(name='arvados-node-manager', 'apache-libcloud==2.5.0', 'subprocess32>=3.5.1', ], - zip_safe=False - ) + zip_safe=False, +) diff --git a/tools/crunchstat-summary/setup.py b/tools/crunchstat-summary/setup.py index 40c5a2f9a3..557b6d3f4e 100755 --- a/tools/crunchstat-summary/setup.py +++ b/tools/crunchstat-summary/setup.py @@ -42,5 +42,5 @@ setup(name='crunchstat_summary', ], test_suite='tests', tests_require=['pbr<1.7.0', 'mock>=1.0'], - zip_safe=False - ) + zip_safe=False, +)