From: Ward Vandewege Date: Thu, 1 Jul 2021 13:23:45 +0000 (-0400) Subject: 17778: Merge branch 'master' into 17778-doc-update X-Git-Tag: 2.3.0~166^2 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/b9c5339d113c63ffc3d8a7c6bf1019616bb3f89a?hp=5b554bbe0b9104e8a34b87d5570cbf87f0308bce 17778: Merge branch 'master' into 17778-doc-update Arvados-DCO-1.1-Signed-off-by: Ward Vandewege --- diff --git a/apps/workbench/Gemfile b/apps/workbench/Gemfile index 239c24d950..46c1e8e60c 100644 --- a/apps/workbench/Gemfile +++ b/apps/workbench/Gemfile @@ -18,9 +18,6 @@ gem 'responders', '~> 2.0' # See: https://github.com/rails/sprockets-rails/issues/443 gem 'sprockets', '~> 3.0' -# Fast app boot times -gem 'bootsnap', require: false - # Note: keeping this out of the "group :assets" section "may" allow us # to use Coffescript for UJS responses. It also prevents a # warning/problem when running tests: "WARN: tilt autoloading diff --git a/apps/workbench/Gemfile.lock b/apps/workbench/Gemfile.lock index fe497fe844..178e5cdfe1 100644 --- a/apps/workbench/Gemfile.lock +++ b/apps/workbench/Gemfile.lock @@ -16,43 +16,43 @@ GEM remote: https://rubygems.org/ specs: RedCloth (4.3.2) - actioncable (5.2.4.5) - actionpack (= 5.2.4.5) + actioncable (5.2.6) + actionpack (= 5.2.6) nio4r (~> 2.0) websocket-driver (>= 0.6.1) - actionmailer (5.2.4.5) - actionpack (= 5.2.4.5) - actionview (= 5.2.4.5) - activejob (= 5.2.4.5) + actionmailer (5.2.6) + actionpack (= 5.2.6) + actionview (= 5.2.6) + activejob (= 5.2.6) mail (~> 2.5, >= 2.5.4) rails-dom-testing (~> 2.0) - actionpack (5.2.4.5) - actionview (= 5.2.4.5) - activesupport (= 5.2.4.5) + actionpack (5.2.6) + actionview (= 5.2.6) + activesupport (= 5.2.6) rack (~> 2.0, >= 2.0.8) rack-test (>= 0.6.3) rails-dom-testing (~> 2.0) rails-html-sanitizer (~> 1.0, >= 1.0.2) - actionview (5.2.4.5) - activesupport (= 5.2.4.5) + actionview (5.2.6) + activesupport (= 5.2.6) builder (~> 3.1) erubi (~> 1.4) rails-dom-testing (~> 2.0) rails-html-sanitizer (~> 1.0, >= 1.0.3) - activejob (5.2.4.5) - activesupport (= 5.2.4.5) + activejob (5.2.6) + activesupport (= 5.2.6) globalid (>= 0.3.6) - activemodel (5.2.4.5) - activesupport (= 5.2.4.5) - activerecord (5.2.4.5) - activemodel (= 5.2.4.5) - activesupport (= 5.2.4.5) + activemodel (5.2.6) + activesupport (= 5.2.6) + activerecord (5.2.6) + activemodel (= 5.2.6) + activesupport (= 5.2.6) arel (>= 9.0) - activestorage (5.2.4.5) - actionpack (= 5.2.4.5) - activerecord (= 5.2.4.5) - marcel (~> 0.3.1) - activesupport (5.2.4.5) + activestorage (5.2.6) + actionpack (= 5.2.6) + activerecord (= 5.2.6) + marcel (~> 1.0.0) + activesupport (5.2.6) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (>= 0.7, < 2) minitest (~> 5.1) @@ -87,8 +87,6 @@ GEM multi_json (>= 1.0.0) autoprefixer-rails (9.5.1.1) execjs - bootsnap (1.4.7) - msgpack (~> 1.0) bootstrap-sass (3.4.1) autoprefixer-rails (>= 5.2.1) sassc (>= 2.0.0) @@ -121,7 +119,7 @@ GEM execjs coffee-script-source (1.12.2) commonjs (0.2.7) - concurrent-ruby (1.1.8) + concurrent-ruby (1.1.9) crass (1.0.6) deep_merge (1.2.1) docile (1.3.1) @@ -167,29 +165,25 @@ GEM railties (>= 4) request_store (~> 1.0) logstash-event (1.2.02) - loofah (2.9.0) + loofah (2.10.0) crass (~> 1.0.2) nokogiri (>= 1.5.9) mail (2.7.1) mini_mime (>= 0.1.1) - marcel (0.3.3) - mimemagic (~> 0.3.2) + marcel (1.0.1) memoist (0.16.2) metaclass (0.0.4) method_source (1.0.0) mime-types (3.2.2) mime-types-data (~> 3.2015) mime-types-data (3.2019.0331) - mimemagic (0.3.8) - nokogiri (~> 1) - mini_mime (1.0.2) - mini_portile2 (2.5.1) + mini_mime (1.1.0) + mini_portile2 (2.5.3) minitest (5.10.3) mocha (1.8.0) metaclass (~> 0.0.1) morrisjs-rails (0.5.1.2) railties (> 3.1, < 6) - msgpack (1.3.3) multi_json (1.15.0) multipart-post (2.1.1) net-scp (2.0.0) @@ -200,7 +194,7 @@ GEM net-ssh-gateway (2.0.0) net-ssh (>= 4.0.0) nio4r (2.5.7) - nokogiri (1.11.5) + nokogiri (1.11.7) mini_portile2 (~> 2.5.0) racc (~> 1.4) npm-rails (0.2.1) @@ -226,18 +220,18 @@ GEM rack (>= 1.2.0) rack-test (1.1.0) rack (>= 1.0, < 3) - rails (5.2.4.5) - actioncable (= 5.2.4.5) - actionmailer (= 5.2.4.5) - actionpack (= 5.2.4.5) - actionview (= 5.2.4.5) - activejob (= 5.2.4.5) - activemodel (= 5.2.4.5) - activerecord (= 5.2.4.5) - activestorage (= 5.2.4.5) - activesupport (= 5.2.4.5) + rails (5.2.6) + actioncable (= 5.2.6) + actionmailer (= 5.2.6) + actionpack (= 5.2.6) + actionview (= 5.2.6) + activejob (= 5.2.6) + activemodel (= 5.2.6) + activerecord (= 5.2.6) + activestorage (= 5.2.6) + activesupport (= 5.2.6) bundler (>= 1.3.0) - railties (= 5.2.4.5) + railties (= 5.2.6) sprockets-rails (>= 2.0.0) rails-controller-testing (1.0.4) actionpack (>= 5.0.1.x) @@ -249,9 +243,9 @@ GEM rails-html-sanitizer (1.3.0) loofah (~> 2.3) rails-perftest (0.0.7) - railties (5.2.4.5) - actionpack (= 5.2.4.5) - activesupport (= 5.2.4.5) + railties (5.2.6) + actionpack (= 5.2.6) + activesupport (= 5.2.6) method_source rake (>= 0.8.7) thor (>= 0.19.0, < 2.0) @@ -321,7 +315,7 @@ GEM uglifier (2.7.2) execjs (>= 0.3.0) json (>= 1.8.0) - websocket-driver (0.7.3) + websocket-driver (0.7.4) websocket-extensions (>= 0.1.0) websocket-extensions (0.1.5) xpath (2.1.0) @@ -336,7 +330,6 @@ DEPENDENCIES andand angularjs-rails (~> 1.3.8) arvados (~> 2.1.5) - bootsnap bootstrap-sass (~> 3.4.1) bootstrap-tab-history-rails bootstrap-x-editable-rails @@ -386,4 +379,4 @@ DEPENDENCIES uglifier (~> 2.0) BUNDLED WITH - 1.17.3 + 2.2.19 diff --git a/apps/workbench/config/boot.rb b/apps/workbench/config/boot.rb index 6add5911f6..8153266683 100644 --- a/apps/workbench/config/boot.rb +++ b/apps/workbench/config/boot.rb @@ -8,7 +8,6 @@ require 'rubygems' ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__) require 'bundler/setup' if File.exists?(ENV['BUNDLE_GEMFILE']) -require 'bootsnap/setup' # Speed up boot time by caching expensive operations. # Use ARVADOS_API_TOKEN environment variable (if set) in console require 'rails' diff --git a/apps/workbench/test/integration/logins_test.rb b/apps/workbench/test/integration/logins_test.rb index f079fbb8f1..3c6ab7c743 100644 --- a/apps/workbench/test/integration/logins_test.rb +++ b/apps/workbench/test/integration/logins_test.rb @@ -17,10 +17,7 @@ class LoginsTest < ActionDispatch::IntegrationTest test "trying to use expired token redirects to login page" do visit page_with_token('expired_trustedclient') - buttons = all("a.btn", text: /Log in/) + buttons = all("button.btn", text: /Log in/) assert_equal(1, buttons.size, "Failed to find one login button") - login_link = buttons.first[:href] - assert_match(%r{//[^/]+/login}, login_link) - assert_no_match(/\bapi_token=/, login_link) end end diff --git a/cmd/arvados-client/cmd.go b/cmd/arvados-client/cmd.go index aefcce79a4..cb15462119 100644 --- a/cmd/arvados-client/cmd.go +++ b/cmd/arvados-client/cmd.go @@ -11,6 +11,7 @@ import ( "git.arvados.org/arvados.git/lib/cmd" "git.arvados.org/arvados.git/lib/costanalyzer" "git.arvados.org/arvados.git/lib/deduplicationreport" + "git.arvados.org/arvados.git/lib/diagnostics" "git.arvados.org/arvados.git/lib/mount" ) @@ -59,6 +60,7 @@ var ( "costanalyzer": costanalyzer.Command, "shell": shellCommand{}, "connect-ssh": connectSSHCommand{}, + "diagnostics": diagnostics.Command{}, }) ) diff --git a/doc/_config.yml b/doc/_config.yml index 55987c062f..06d1595213 100644 --- a/doc/_config.yml +++ b/doc/_config.yml @@ -113,7 +113,6 @@ navbar: - api/requests.html.textile.liquid - api/methods.html.textile.liquid - api/resources.html.textile.liquid - - api/tokens_sso.html.textile.liquid - Permission and authentication: - api/methods/api_client_authorizations.html.textile.liquid - api/methods/api_clients.html.textile.liquid diff --git a/doc/admin/restricting-upload-download.html.textile.liquid b/doc/admin/restricting-upload-download.html.textile.liquid new file mode 100644 index 0000000000..ea10752342 --- /dev/null +++ b/doc/admin/restricting-upload-download.html.textile.liquid @@ -0,0 +1,148 @@ +--- +layout: default +navsection: admin +title: Restricting upload or download +... + +{% comment %} +Copyright (C) The Arvados Authors. All rights reserved. + +SPDX-License-Identifier: CC-BY-SA-3.0 +{% endcomment %} + +For some use cases, you may want to limit the ability of users to upload or download data from outside the cluster. (By "outside" we mean from networks other than the cluster's own private network). For example, this makes it possible to share restricted data sets with users so that they may run their own data analysis on the cluster, while preventing them from easily downloading the data set to their local workstation. + +This feature exists in addition to the existing Arvados permission system. Users can only download from collections they have @read@ access to, and can only upload to projects and collections they have @write@ access to. + +There are two services involved in accessing data from outside the cluster. + +h2. Keepproxy Permissions + +Permitting @keeproxy@ makes it possible to use @arv-put@ and @arv-get@, and upload from Workbench 1. It works in terms of individual 64 MiB keep blocks. It prints a log each time a user uploads or downloads an individual block. + +The default policy allows anyone to upload or download. + +
+    Collections:
+      KeepproxyPermission:
+        User:
+          Download: true
+          Upload: true
+        Admin:
+          Download: true
+          Upload: true
+
+ +If you create a sharing link as an admin user, and then give someone the token from the sharing link to download a file using @arv-get@, because the downloader is anonymous, the download permission will be restricted based on the "User" role and not the "Admin" role. + +h2. WebDAV and S3 API Permissions + +Permitting @WebDAV@ makes it possible to use WebDAV, S3 API, download from Workbench 1, and upload/download with Workbench 2. It works in terms of individual files. It prints a log each time a user uploads or downloads a file. When @WebDAVLogEvents@ (default true) is enabled, it also adds an entry into the API server @logs@ table. + +When a user attempts to upload or download from a service without permission, they will receive a @403 Forbidden@ response. This only applies to file content. + +Denying download permission does not deny access to access to XML file listings with PROPFIND, or auto-generated HTML documents containing file listings. + +Denying upload permission does not deny other operations that modify collections without directly accessing file content, such as MOVE and COPY. + +The default policy allows anyone to upload or download. + +
+    Collections:
+      WebDAVPermisison:
+        User:
+          Download: true
+          Upload: true
+        Admin:
+          Download: true
+          Upload: true
+      WebDAVLogEvents: true
+
+ +If you create a sharing link as an admin user, and then give someone the token from the sharing link to download a file over HTTP (WebDAV or S3 API), because the downloader is anonymous, the download permission will be restricted based on the "User" role and not the "Admin" role. + +h2. Shell node and container permissions + +Be aware that even when upload and download from outside the network is not allowed, a user who has access to a shell node or runs a container still has internal access to Keep. (This is necessary to be able to run workflows). From the shell node or container, a user could send data outside the network by some other method, although this requires more intent than accidentally clicking on a link and downloading a file. It is possible to set up a firewall to prevent shell and compute nodes from making connections to hosts outside the private network. Exactly how to configure firewalls is out of scope for this page, as it depends on the specific network infrastructure of your cluster. + +h2. Choosing a policy + +This distinction between WebDAV and Keepproxy is important for auditing. WebDAV records 'upload' and 'download' events on the API server that are included in the "User Activity Report":user-activity.html , whereas @keepproxy@ only logs upload and download of individual blocks, which require a reverse lookup to determine the collection(s) and file(s) a block is associated with. + +You set separate permissions for @WebDAV@ and @Keepproxy@, with separate policies for regular users and admin users. + +These policies apply to only access from outside the cluster, using Workbench or Arvados CLI tools. + +The @WebDAVLogEvents@ option should be enabled if you intend to the run the "User Activity Report":user-activity.html . If you don't need audits, or you are running a site that is mostly serving public data to anonymous downloaders, you can disable in to avoid the extra API server request. + +h3. Audited downloads + +For ease of access auditing, this policy prevents downloads using @arv-get@. Downloads through WebDAV and S3 API are permitted, but logged. Uploads are allowed. + +
+    Collections:
+      WebDAVPermisison:
+        User:
+          Download: true
+          Upload: true
+        Admin:
+          Download: true
+          Upload: true
+
+      KeepproxyPermission:
+        User:
+          Download: false
+          Upload: true
+        Admin:
+          Download: false
+          Upload: true
+      WebDAVLogEvents: true
+
+ +h3. Disallow downloads by regular users + +This policy prevents regular users (non-admin) from downloading data. Uploading is allowed. This supports the case where restricted data sets are shared with users so that they may run their own data analysis on the cluster, while preventing them from downloading the data set to their local workstation. Be aware that users won't be able to download the results of their analysis, either, requiring an admin in the loop or some other process to release results. + +
+    Collections:
+      WebDAVPermisison:
+        User:
+          Download: false
+          Upload: true
+        Admin:
+          Download: true
+          Upload: true
+
+      KeepproxyPermission:
+        User:
+          Download: false
+          Upload: true
+        Admin:
+          Download: true
+          Upload: true
+      WebDAVLogEvents: true
+
+ +h3. Disallow uploads by regular users + +This policy is suitable for an installation where data is being shared with a group of users who are allowed to download the data, but not permitted to store their own data on the cluster. + +
+    Collections:
+      WebDAVPermisison:
+        User:
+          Download: true
+          Upload: false
+        Admin:
+          Download: true
+          Upload: true
+
+      KeepproxyPermission:
+        User:
+          Download: true
+          Upload: false
+        Admin:
+          Download: true
+          Upload: true
+      WebDAVLogEvents: true
+
diff --git a/doc/api/tokens_sso.html.textile.liquid b/doc/api/tokens_sso.html.textile.liquid deleted file mode 100644 index 5d5246573b..0000000000 --- a/doc/api/tokens_sso.html.textile.liquid +++ /dev/null @@ -1,78 +0,0 @@ ---- -layout: default -navsection: api -title: API Authorization with SSO (deprecated) -... -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -{% include 'notebox_begin_warning' %} -This page describes the deprecated login flow via the SSO server. SSO server authentication will be removed in a future Arvados release and should not be used for new installations. See "Set up web based login":{{site.baseurl}}/install/setup-login.html for more information. -{% include 'notebox_end' %} - -All requests to the API server must have an API token. API tokens can be issued by going though the login flow, or created via the API. At this time, only browser based applications can perform login from email/password. Command line applications and services must use an API token provided via the @ARVADOS_API_TOKEN@ environment variable or configuration file. - -h2. Browser login - -Browser based applications can perform log in via the following highlevel flow: - -# The web application presents a "login" link to @/login@ on the API server with a @return_to@ parameter provided in the query portion of the URL. For example @https://{{ site.arvados_api_host }}/login?return_to=XXX@ , where @return_to=XXX@ is the URL of the login page for the web application. -# The "login" link takes the browser to the login page (this may involve several redirects) -# The user logs in. API server authenticates the user and issues a new API token. -# The browser is redirected to the login page URL provided in @return_to=XXX@ with the addition of @?api_token=xxxxapitokenxxxx@. -# The web application gets the login request with the included authorization token. - -!{{site.baseurl}}/images/Session_Establishment_with_SSO.svg! - -The "browser authentication process is documented in detail on the Arvados wiki.":https://dev.arvados.org/projects/arvados/wiki/Workbench_authentication_process - -h2. User activation - -"Creation and activation of new users is described here.":{{site.baseurl}}/admin/user-management.html - -h2. Creating tokens via the API - -The browser login method above issues a new token. Using that token, it is possible to make API calls to create additional tokens. To do so, use the @create@ method of the "API client authorizations":{{site.baseurl}}/api/methods/api_client_authorizations.html resource. - -h2. Trusted API clients - -The "api_clients":{{site.baseurl}}/api/methods/api_clients.html resource determines if web applications that have gone through the browser login flow may create or list API tokens. - -After the user has authenticated, but before an authorization token is issued and browser redirect sent (sending the browser back to the @return_to@ login page bearing @api_token@), the server strips the path and query portion from @return_to@ to get @url_prefix@. The @url_prefix@ is used to find or create an ApiClient object. The newly issued API client authorization (API token) is associated with this ApiClient object. - -API clients may be marked as "trusted" by making an API call to create or update an "api_clients":{{site.baseurl}}/api/methods/api_clients.html resource and set the @is_trusted@ flag to @true@. An authorization token associated with a "trusted" client is permitted to list authorization tokens on "API client authorizations":{{site.baseurl}}/api/methods/api_client_authorizations.html . - -A authorization token which is not associated with a trusted client may only use the @current@ method to query its own api_client_authorization object. The "untrusted" token is forbidden performing any other operations on API client authorizations, such as listing other authorizations or creating new authorizations. - -Authorization tokens which are not issued via the browser login flow (created directly via the API) inherit the api client of the token used to create them. They will always be "trusted" because untrusted API clients cannot create tokens. - -h2(#scopes). Scopes - -Scopes can restrict a token so it may only access certain resources. This is in addition to normal permission checks for the user associated with the token. - -Each entry in scopes consists of a @request_method@ and @request_path@. The @request_method@ is a HTTP method (one of @GET@, @POST@, @PATCH@ or @DELETE@) and @request_path@ is the request URI. A given request is permitted if it matches a scopes exactly, or the scope ends with @/@ and the request string is a prefix of the scope. - -As a special case, a scope of @["all"]@ allows all resources. This is the default if no scope is given. - -Using scopes is also described on the "Securing API access with scoped tokens":{{site.baseurl}}/admin/scoped-tokens.html page of the admin documentation. - -h3. Scope examples - -A scope of @GET /arvados/v1/collections@ permits listing collections. - -* Requests with different methods, such as creating a new collection using @POST /arvados/v1/collections@, will be rejected. -* Requests to access other resources, such as @GET /arvados/v1/groups@, will be rejected. -* Be aware that requests for specific records, such as @GET /arvados/v1/collections/962eh-4zz18-xi32mpz2621o8km@ will also be rejected. This is because the scope @GET /arvados/v1/collections@ does not end in @/@ - -A scope of @GET /arvados/v1/collections/@ (with @/@ suffix) will permit access to individual collections. - -* The request @GET /arvados/v1/collections/962eh-4zz18-xi32mpz2621o8km@ will succeed -* Be aware that requests for listing @GET /arvados/v1/collections@ (no @/@ suffix) will be rejected, because it is not a match with the rule @GET /arvados/v1/collections/@ -* A listing request @GET /arvados/v1/collections/@ will have the trailing @/@ suffix trimmed before the scope check, as a result it will not match the rule @GET /arvados/v1/collections/@. - -To allow both listing objects and requesting individual objects, include both in the scope: @["GET /arvados/v1/collections", "GET /arvados/v1/collections/"]@ - -A narrow scope such as @GET /arvados/v1/collections/962eh-4zz18-xi32mpz2621o8km@ will disallow listing objects as well as disallow requesting any object other than those listed in the scope. diff --git a/doc/install/salt-multi-host.html.textile.liquid b/doc/install/salt-multi-host.html.textile.liquid index ed57807c72..89dfc1717d 100644 --- a/doc/install/salt-multi-host.html.textile.liquid +++ b/doc/install/salt-multi-host.html.textile.liquid @@ -9,6 +9,7 @@ Copyright (C) The Arvados Authors. All rights reserved. SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} +# "Introduction":#introduction # "Hosts preparation":#hosts_preparation ## "Hosts setup using terraform (experimental)":#hosts_setup_using_terraform ## "Create a compute image":#create_a_compute_image @@ -21,6 +22,18 @@ SPDX-License-Identifier: CC-BY-SA-3.0 # "Initial user and login":#initial_user # "Test the installed cluster running a simple workflow":#test_install + + +h2(#introduction). Introduction + +Arvados components can be installed in a distributed infrastructure, whether it is an "on-prem" with physical or virtual hosts, or a cloud environment. + +As infrastructures vary a great deal from site to site, these instructions should be considered more as 'guidelines' than fixed steps to follow. + +We provide an "installer script":salt.html that can help you deploy the different Arvados components. At the time of writing, the provided examples are suitable to install Arvados on AWS. + + + h2(#hosts_preparation). Hosts preparation In order to run Arvados on a multi-host installation, there are a few requirements that your infrastructure has to fulfill. @@ -50,10 +63,15 @@ We suggest distributing the Arvados components in the following way, creating at Note that these hosts can be virtual machines in your infrastructure and they don't need to be physical machines. -h3(#hosts_setup_using_terraform). Hosts setup using terraform (experimental) +Again, if your infrastructure differs from the setup proposed above (ie, using RDS or an existing DB server), remember that you will need to edit the configuration files for the scripts so they work with your infrastructure. + + +h3(#hosts_setup_using_terraform). Hosts setup using terraform (AWS, experimental) + +We added a few "terraform":https://terraform.io/ scripts (https://github.com/arvados/arvados/tree/master/tools/terraform) to let you create these instances easier in an AWS account. Check "the Arvados terraform documentation":/doc/install/terraform.html for more details. + + -We added a few "terraform":https://terraform.io/ scripts (https://github.com/arvados/arvados/tree/master/tools/terraform) to let you create these instances easier. -Check "the Arvados terraform documentation":/doc/install/terraform.html for more details. h2(#multi_host). Multi host install using the provision.sh script @@ -63,7 +81,7 @@ h2(#multi_host). Multi host install using the provision.sh script {% assign branchname = 'master' %} {% endif %} -This is a package-based installation method. Start with the @provision.sh@ script which is available by cloning the @{{ branchname }}@ branch from "https://git.arvados.org/arvados.git":https://git.arvados.org/arvados.git . The @provision.sh@ script and its supporting files can be found in the "arvados/tools/salt-install":https://git.arvados.org/arvados.git/tree/refs/heads/{{ branchname }}:/tools/salt-install directory in the Arvados git repository. +This is a package-based installation method. Start with the @provision.sh@ script which is available by cloning the @{{ branchname }}@ branch from "https://git.arvados.org/arvados.git":https://git.arvados.org/arvados.git . The @provision.sh@ script and its supporting files can be found in the "arvados/tools/salt-install":https://git.arvados.org/arvados.git/tree/refs/heads/{{ branchname }}:/tools/salt-install directory in the Arvados git repository. This procedure will install all the main Arvados components to get you up and running in a multi-host environment. @@ -73,7 +91,7 @@ After setting up a few variables in a config file (next step), you'll be ready t h3(#create_a_compute_image). Create a compute image -In a multi-host installation, containers are dispatched in docker daemons running in the compute instances, which need some special setup. We provide a "compute image builder script":https://github.com/arvados/arvados/tree/master/tools/compute-images that you can use to build a template image following "these instructions":https://doc.arvados.org/main/install/crunch2-cloud/install-compute-node.html . Once you have that image created, you can use the image reference in the Arvados configuration in the next steps. +In a multi-host installation, containers are dispatched in docker daemons running in the compute instances, which need some special setup. We provide a "compute image builder script":https://github.com/arvados/arvados/tree/master/tools/compute-images that you can use to build a template image following "these instructions":https://doc.arvados.org/main/install/crunch2-cloud/install-compute-node.html . Once you have that image created, you can use the image ID in the Arvados configuration in the next steps. h2(#choose_configuration). Choose the desired configuration @@ -92,7 +110,7 @@ cp -r config_examples/multi_host/aws local_config_dir Edit the variables in the local.params file. Pay attention to the *_INT_IP, *_TOKEN and *KEY variables. Those variables will be used to do a search and replace on the pillars/* in place of any matching __VARIABLE__. -The multi_host include LetsEncrypt salt code to automatically request and install the certificates for the public-facing hosts (API, Workbench) so it will need the hostnames to be reachable from the Internet. If this cluster will not be the case, please set the variable USE_LETSENCRYPT=no. +The multi_host include LetsEncrypt salt code to automatically request and install the certificates for the public-facing hosts (API/controller, Workbench, Keepproxy/Keepweb) using AWS' Route53. If you will provide custom certificates, please set the variable USE_LETSENCRYPT=no. h3(#further_customization). Further customization of the installation (modifying the salt pillars and states) diff --git a/doc/install/salt.html.textile.liquid b/doc/install/salt.html.textile.liquid index a9ee08fb88..d110f73000 100644 --- a/doc/install/salt.html.textile.liquid +++ b/doc/install/salt.html.textile.liquid @@ -1,7 +1,7 @@ --- layout: default navsection: installguide -title: Salt prerequisites +title: Planning and prerequisites ... {% comment %} Copyright (C) The Arvados Authors. All rights reserved. @@ -10,35 +10,107 @@ SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} # "Introduction":#introduction -# "Install Saltstack":#saltstack -# "Choose an Arvados installation configuration":#installconfiguration +# "Provisioning Arvados with Saltstack":#provisioning_arvados +# "The provisioning tool files and directories":#provisioning_tool_files and directories +# "Choose an Arvados installation configuration":#choose_configuration +## "Further customization of the installation (modifying the salt pillars and states)":#further_customization +# "Dump the configuration files created with the provision script":#dump_provision_config +# "Add the Arvados formula to your Saltstack infrastructure":#add_formula_to_saltstack h2(#introduction). Introduction -To ease the installation of the various Arvados components, we have developed a "Saltstack":https://www.saltstack.com/ 's "arvados-formula":https://github.com/arvados/arvados-formula.git which can help you get an Arvados cluster up and running. +To ease the installation of the various Arvados components, we have developed a "Saltstack":https://www.saltstack.com/ 's "arvados-formula":https://git.arvados.org/arvados-formula.git which can help you get an Arvados cluster up and running. -Saltstack is a Python-based, open-source software for event-driven IT automation, remote task execution, and configuration management. It can be used in a master/minion setup or master-less. +Saltstack is a Python-based, open-source software for event-driven IT automation, remote task execution, and configuration management. It can be used in a _master/minion_ setup (where a master node orchestrates and coordinates the configuration of nodes in an infrastructure) or master-less, where Saltstack is run locally in a node, with no communication with a master node. -This is a package-based installation method. The Salt scripts to install and configure Arvados using this formula are available at the "tools/salt-install":https://github.com/arvados/arvados/tree/master/tools/salt-install directory in the Arvados git repository. +Similar to other configuration management tools like Puppet, Ansible or Chef, Saltstack uses files named states to describe the tasks that will be performed on a node to take it to a desired state, and pillars to configure variables passed to the states, adding flexibility to the tool. -h2(#saltstack). Install Saltstack +You don't need to be running a Saltstack infrastructure to install Arvados: we wrote a provisioning script that will take care of setting up Saltstack in the node/s where you want to install Arvados and run a master-less installer. Once Arvados is installed, you can either uninstall Saltstack and its files or you can keep them, to modify/maintain your Arvados installation in the future. -If you already have a Saltstack environment or you plan to use the @provision.sh@ script we provide, you can skip this section. +This is a package-based installation method. -The simplest way to get Salt up and running on a node is to use the bootstrap script they provide: + +h2(#provisioning_arvados). Provisioning Arvados with Saltstack + +The "tools/salt-install":https://git.arvados.org/arvados.git/tree/{{ branchname }}:/tools/salt-install directory in the Arvados git repository contains a script that you can run in the node/s where you want to install Arvados' components (the @provision.sh@ script) and a few configuration examples for different setups, that you can use to customize your installation. + +The @provision.sh@ script will help you deploy Arvados by preparing your environment to be able to run the installer, then running it. The actual installer is located at "arvados-formula":https://git.arvados.org/arvados-formula.git/tree/refs/heads/{{ branchname }} and will be cloned during the running of the @provision.sh@ script. The installer is built using "Saltstack":https://saltproject.io/ and @provision.sh@ performs the install using master-less mode. + +After setting up a few variables in a config file and copying a directory from the examples (see below), you'll be ready to run it and get Arvados deployed. + + + +h2(#provisioning_tool_files and directories). The provisioning tool files and directories + +The "tools/salt-install":https://git.arvados.org/arvados.git/tree/{{ branchname }}:/tools/salt-install directory contains the following elements: + +* The @provision.sh@ script itself. You don't need to modify it. +* A few @local.params.*@ example files. You will need to copy one of these files to a file named @local.params@, which is the main configuration file for the @provision.sh@ script. +* A few @config_examples/*@ directories, with pillars and states templates. You need to copy one of these to a @local_config_dir@ directory, which will be used by the @provision.sh@ script to setup your nodes. +* A @tests@ directory, with a simple workflow and arvados CLI commands you can run to tests your cluster is capable of running a CWL workflow, upload files and create a user. + +Once you decide on an Arvados architecture you want to apply, you need to copy one of the example configuration files and directory, and edit them to suit your needs. + +Ie., for a multi-hosts / multi-hostnames in AWS, you need to do this: -
curl -L https://bootstrap.saltstack.com -o /tmp/bootstrap_salt.sh
-sudo sh /tmp/bootstrap_salt.sh -XUdfP -x python3
+
cp local.params.example.multiple_hosts local.params
+cp -r config_examples/multi_host/aws local_config_dir
 
-For more information check "Saltstack's documentation":https://docs.saltstack.com/en/latest/topics/installation/index.html +These local files will be preserved if you upgrade the repository. + + + +h2(#choose_configuration). Choose an Arvados installation configuration + +The configuration examples provided with this installer are suitable to install Arvados with the following distribution of hosts/roles: + +* All roles on a single host, which can be done in two fashions: +** Using a single hostname, assigning a different port (other than 443) for each user-facing service: This choice is easier to setup, but the user will need to know the port/s for the different services she wants to connect to. See "Single host install using the provision.sh script":salt-single-host.html for more details. +** Using multiple hostnames on the same IP: this setup involves a few extra steps but each service will have a meaningful hostname so it will make easier to access them later. See "Single host install using the provision.sh script":salt-single-host.html for more details. +* Roles distributed over multiple AWS instances, using multiple hostnames. This example can be adapted to use on-prem too. See "Multiple hosts installation":salt-multi-host.html for more details. + +Once you decide which of these choices you prefer, copy one of the example configuration files and directory, and edit them to suit your needs. + +Ie, if you decide to install Arvados on a single host using multiple hostnames: + +
cp local.params.example.single_host_multiple_hostnames local.params
+cp -r config_examples/single_host/multiple_hostnames local_config_dir
+
+
+ +Edit the variables in the local.params file. + + + +h3(#further_customization). Further customization of the installation (modifying the salt pillars and states) + +If you want or need further customization, you can edit the Saltstack pillars and states files. Pay particular attention to the pillars/arvados.sls one. Any extra state file you add under local_config_dir/states will be added to the salt run and applied to the host. + + + +h2(#dump_provision_config). Dump the configuration files created with the provision script + +As mentioned above, the @provision.sh@ script helps you create a set of configuration files to be used by the Saltstack @arvados-formula@ and other helper formulas. + +Is it possible you want to inspect these files before deploying them or use them within your existing Saltstack environment. In order to get a rendered version of these files, the @provision.sh@ script has a option, @--dump-config@, which takes a directory as mandatory parameter. When this option it used, the script will create the specified directory and write the pillars, states and tests files so you can inspect them. + +Ie. + +
./provision.sh --dump-config ./config_dump --role workbench
+
+
+ +will dump the configuration files used to install a workbench node under the @config_dump@ directory. + +These files are also suitable to be used in your existing Saltstack environment (see below). + + -h2(#installconfiguration). Choose an Arvados installation configuration +h2.(#add_formula_to_saltstack). Add the Arvados formula to your Saltstack infrastructure -The salt formula can be used in a few different ways. Choose one of these three options to install Arvados: +If you already have a Saltstack environment you can add the arvados-formula to your Saltstack master and apply the corresponding states and pillars to the nodes on your infrastructure that will be used to run Arvados. -* "Arvados on a single host":salt-single-host.html -* "Arvados across multiple hosts":salt-multi-host.html -* "Use Vagrant to install Arvados in a virtual machine":salt-vagrant.html +The @--dump-config@ option described above writes a @pillars/top.sls@ and @salt/top.sls@ files that you can use as a guide to configure your infrastructure. diff --git a/doc/user/topics/storage-classes.html.textile.liquid b/doc/user/topics/storage-classes.html.textile.liquid index 96c8083062..99556af10a 100644 --- a/doc/user/topics/storage-classes.html.textile.liquid +++ b/doc/user/topics/storage-classes.html.textile.liquid @@ -16,10 +16,18 @@ Names of storage classes are internal to the cluster and decided by the administ h3. arv-put -You may specify the desired storage class for a collection uploaded using @arv-put@: +You may specify one or more desired storage classes for a collection uploaded using @arv-put@:
-$ arv-put --storage-classes=hot myfile.txt
+$ arv-put --storage-classes=hot,archival myfile.txt
+
+ +h3. arv-mount + +You can ask @arv-mount@ to use specific storage classes when creating new collections: + +
+$ arv-mount --storage-classes=transient --mount-tmp=scratch keep
 
h3. arvados-cwl-runner @@ -46,8 +54,6 @@ h3. Storage class notes Collection blocks will be in the "default" storage class if not otherwise specified. -Currently, a collection may only have one desired storage class. - Any user with write access to a collection may set any storage class on that collection. Names of storage classes are internal to the cluster and decided by the administrator. Aside from "default", Arvados currently does not define any standard storage class names. diff --git a/go.mod b/go.mod index 0ff679a576..b70f6f3b47 100644 --- a/go.mod +++ b/go.mod @@ -55,12 +55,14 @@ require ( github.com/prometheus/common v0.7.0 github.com/satori/go.uuid v1.2.1-0.20180103174451-36e9d2ebbde5 // indirect github.com/sergi/go-diff v1.0.0 // indirect - github.com/sirupsen/logrus v1.4.2 + github.com/sirupsen/logrus v1.8.1 github.com/src-d/gcfg v1.3.0 // indirect github.com/xanzy/ssh-agent v0.1.0 // indirect golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 + golang.org/x/sys v0.0.0-20210603125802-9665404d3644 + golang.org/x/tools v0.1.0 // indirect golang.org/x/sys v0.0.0-20210510120138-977fb7262007 golang.org/x/tools v0.1.2 // indirect google.golang.org/api v0.13.0 diff --git a/go.sum b/go.sum index c28ab46240..1fd37ed11c 100644 --- a/go.sum +++ b/go.sum @@ -234,6 +234,8 @@ github.com/shabbyrobe/gocovmerge v0.0.0-20180507124511-f6ea450bfb63/go.mod h1:n+ github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= +github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/spf13/afero v1.2.1 h1:qgMbHoJbPbw579P+1zVY+6n4nIFuIchaIjzZ/I/Yq8M= github.com/spf13/afero v1.2.1/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/src-d/gcfg v1.3.0 h1:2BEDr8r0I0b8h/fOqwtxCEiq2HJu8n2JGZJQFGXWLjg= @@ -309,10 +311,13 @@ golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4 h1:myAQVi0cGEoqQVR5POX+8RR2mrocKqNN1hmeMqhX27k= golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210603125802-9665404d3644 h1:CA1DEQ4NdKphKeL70tvsWNdT5oFh1lOjihRcEDROi0I= +golang.org/x/sys v0.0.0-20210603125802-9665404d3644/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007 h1:gG67DSER+11cZvqIMb8S8bt0vZtiN6xWYARwirrOSfE= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/lib/cloud/ec2/ec2.go b/lib/cloud/ec2/ec2.go index 071c95006c..269a7d8def 100644 --- a/lib/cloud/ec2/ec2.go +++ b/lib/cloud/ec2/ec2.go @@ -20,6 +20,7 @@ import ( "git.arvados.org/arvados.git/lib/cloud" "git.arvados.org/arvados.git/sdk/go/arvados" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/aws/credentials" "github.com/aws/aws-sdk-go/aws/credentials/ec2rolecreds" "github.com/aws/aws-sdk-go/aws/ec2metadata" @@ -349,6 +350,31 @@ func (err rateLimitError) EarliestRetry() time.Time { return err.earliestRetry } +var isCodeCapacity = map[string]bool{ + "InsufficientInstanceCapacity": true, + "VcpuLimitExceeded": true, + "MaxSpotInstanceCountExceeded": true, +} + +// isErrorCapacity returns whether the error is to be throttled based on its code. +// Returns false if error is nil. +func isErrorCapacity(err error) bool { + if aerr, ok := err.(awserr.Error); ok && aerr != nil { + if _, ok := isCodeCapacity[aerr.Code()]; ok { + return true + } + } + return false +} + +type ec2QuotaError struct { + error +} + +func (er *ec2QuotaError) IsQuotaError() bool { + return true +} + func wrapError(err error, throttleValue *atomic.Value) error { if request.IsErrorThrottle(err) { // Back off exponentially until an upstream call @@ -362,6 +388,8 @@ func wrapError(err error, throttleValue *atomic.Value) error { } throttleValue.Store(d) return rateLimitError{error: err, earliestRetry: time.Now().Add(d)} + } else if isErrorCapacity(err) { + return &ec2QuotaError{err} } else if err != nil { throttleValue.Store(time.Duration(0)) return err diff --git a/lib/cloud/ec2/ec2_test.go b/lib/cloud/ec2/ec2_test.go index e7319a0cb6..3cd238ded5 100644 --- a/lib/cloud/ec2/ec2_test.go +++ b/lib/cloud/ec2/ec2_test.go @@ -25,6 +25,7 @@ package ec2 import ( "encoding/json" "flag" + "sync/atomic" "testing" "git.arvados.org/arvados.git/lib/cloud" @@ -32,6 +33,7 @@ import ( "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/config" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/service/ec2" "github.com/sirupsen/logrus" check "gopkg.in/check.v1" @@ -246,4 +248,14 @@ func (*EC2InstanceSetSuite) TestDestroyInstances(c *check.C) { } } -var TestRateLimitErrorInterface cloud.RateLimitError = rateLimitError{} +func (*EC2InstanceSetSuite) TestWrapError(c *check.C) { + retryError := awserr.New("Throttling", "", nil) + wrapped := wrapError(retryError, &atomic.Value{}) + _, ok := wrapped.(cloud.RateLimitError) + c.Check(ok, check.Equals, true) + + quotaError := awserr.New("InsufficientInstanceCapacity", "", nil) + wrapped = wrapError(quotaError, nil) + _, ok = wrapped.(cloud.QuotaError) + c.Check(ok, check.Equals, true) +} diff --git a/lib/cmd/cmd.go b/lib/cmd/cmd.go index b7d918739b..63d7576b4c 100644 --- a/lib/cmd/cmd.go +++ b/lib/cmd/cmd.go @@ -16,6 +16,8 @@ import ( "runtime" "sort" "strings" + + "github.com/sirupsen/logrus" ) type Handler interface { @@ -153,3 +155,9 @@ func SubcommandToFront(args []string, flagset FlagSet) []string { copy(newargs[flagargs+1:], args[flagargs+1:]) return newargs } + +type NoPrefixFormatter struct{} + +func (NoPrefixFormatter) Format(entry *logrus.Entry) ([]byte, error) { + return []byte(entry.Message + "\n"), nil +} diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index e2ef9899e5..e28d5cbb7f 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -24,49 +24,39 @@ Clusters: # In each of the service sections below, the keys under # InternalURLs are the endpoints where the service should be - # listening, and reachable from other hosts in the cluster. - SAMPLE: - InternalURLs: - "http://host1.example:12345": {} - "http://host2.example:12345": - # Rendezvous is normally empty/omitted. When changing the - # URL of a Keepstore service, Rendezvous should be set to - # the old URL (with trailing slash omitted) to preserve - # rendezvous ordering. - Rendezvous: "" - SAMPLE: - Rendezvous: "" - ExternalURL: "-" + # listening, and reachable from other hosts in the + # cluster. Example: + # + # InternalURLs: + # "http://host1.example:12345": {} + # "http://host2.example:12345": {} RailsAPI: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "-" Controller: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "" Websocket: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "" Keepbalance: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "-" GitHTTP: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "" GitSSH: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "" DispatchCloud: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "-" - SSO: - InternalURLs: {} - ExternalURL: "" Keepproxy: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "" WebDAV: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} # Base URL for Workbench inline preview. If blank, use # WebDAVDownload instead, and disable inline preview. # If both are empty, downloading collections from workbench @@ -105,7 +95,7 @@ Clusters: ExternalURL: "" WebDAVDownload: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} # Base URL for download links. If blank, serve links to WebDAV # with disposition=attachment query param. Unlike preview links, # browsers do not render attachments, so there is no risk of XSS. @@ -119,13 +109,19 @@ Clusters: ExternalURL: "" Keepstore: - InternalURLs: {} + InternalURLs: + SAMPLE: + # Rendezvous is normally empty/omitted. When changing the + # URL of a Keepstore service, Rendezvous should be set to + # the old URL (with trailing slash omitted) to preserve + # rendezvous ordering. + Rendezvous: "" ExternalURL: "-" Composer: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "" WebShell: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} # ShellInABox service endpoint URL for a given VM. If empty, do not # offer web shell logins. # @@ -136,13 +132,13 @@ Clusters: # https://*.webshell.uuid_prefix.arvadosapi.com ExternalURL: "" Workbench1: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "" Workbench2: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "" Health: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "-" PostgreSQL: @@ -555,8 +551,36 @@ Clusters: # Persistent sessions. MaxSessions: 100 + # Selectively set permissions for regular users and admins to + # download or upload data files using the upload/download + # features for Workbench, WebDAV and S3 API support. + WebDAVPermission: + User: + Download: true + Upload: true + Admin: + Download: true + Upload: true + + # Selectively set permissions for regular users and admins to be + # able to download or upload blocks using arv-put and + # arv-get from outside the cluster. + KeepproxyPermission: + User: + Download: true + Upload: true + Admin: + Download: true + Upload: true + + # Post upload / download events to the API server logs table, so + # that they can be included in the arv-user-activity report. + # You can disable this if you find that it is creating excess + # load on the API server and you don't need it. + WebDAVLogEvents: true + Login: - # One of the following mechanisms (SSO, Google, PAM, LDAP, or + # One of the following mechanisms (Google, PAM, LDAP, or # LoginCluster) should be enabled; see # https://doc.arvados.org/install/setup-login.html @@ -737,16 +761,6 @@ Clusters: # originally supplied by the user will be used. UsernameAttribute: uid - SSO: - # Authenticate with a separate SSO server. (Deprecated) - Enable: false - - # ProviderAppID and ProviderAppSecret are generated during SSO - # setup; see - # https://doc.arvados.org/v2.0/install/install-sso.html#update-config - ProviderAppID: "" - ProviderAppSecret: "" - Test: # Authenticate users listed here in the config file. This # feature is intended to be used in test environments, and diff --git a/lib/config/deprecated.go b/lib/config/deprecated.go index 5e68bbfcef..efc9f0837e 100644 --- a/lib/config/deprecated.go +++ b/lib/config/deprecated.go @@ -103,18 +103,6 @@ func (ldr *Loader) applyDeprecatedConfig(cfg *arvados.Config) error { *dst = *n } - // Provider* moved to SSO.Provider* - if dst, n := &cluster.Login.SSO.ProviderAppID, dcluster.Login.ProviderAppID; n != nil && *n != *dst { - *dst = *n - if *n != "" { - // In old config, non-empty ID meant enable - cluster.Login.SSO.Enable = true - } - } - if dst, n := &cluster.Login.SSO.ProviderAppSecret, dcluster.Login.ProviderAppSecret; n != nil && *n != *dst { - *dst = *n - } - cfg.Clusters[id] = cluster } return nil diff --git a/lib/config/export.go b/lib/config/export.go index 23d0b6bffe..8753b52f27 100644 --- a/lib/config/export.go +++ b/lib/config/export.go @@ -106,6 +106,9 @@ var whitelist = map[string]bool{ "Collections.TrashSweepInterval": false, "Collections.TrustAllContent": false, "Collections.WebDAVCache": false, + "Collections.KeepproxyPermission": false, + "Collections.WebDAVPermission": false, + "Collections.WebDAVLogEvents": false, "Containers": true, "Containers.CloudVMs": false, "Containers.CrunchRunArgumentsList": false, @@ -173,10 +176,6 @@ var whitelist = map[string]bool{ "Login.PAM.Enable": true, "Login.PAM.Service": false, "Login.RemoteTokenRefresh": true, - "Login.SSO": true, - "Login.SSO.Enable": true, - "Login.SSO.ProviderAppID": false, - "Login.SSO.ProviderAppSecret": false, "Login.Test": true, "Login.Test.Enable": true, "Login.Test.Users": false, diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go index fbee937b39..b15bf7eebc 100644 --- a/lib/config/generated_config.go +++ b/lib/config/generated_config.go @@ -30,49 +30,39 @@ Clusters: # In each of the service sections below, the keys under # InternalURLs are the endpoints where the service should be - # listening, and reachable from other hosts in the cluster. - SAMPLE: - InternalURLs: - "http://host1.example:12345": {} - "http://host2.example:12345": - # Rendezvous is normally empty/omitted. When changing the - # URL of a Keepstore service, Rendezvous should be set to - # the old URL (with trailing slash omitted) to preserve - # rendezvous ordering. - Rendezvous: "" - SAMPLE: - Rendezvous: "" - ExternalURL: "-" + # listening, and reachable from other hosts in the + # cluster. Example: + # + # InternalURLs: + # "http://host1.example:12345": {} + # "http://host2.example:12345": {} RailsAPI: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "-" Controller: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "" Websocket: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "" Keepbalance: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "-" GitHTTP: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "" GitSSH: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "" DispatchCloud: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "-" - SSO: - InternalURLs: {} - ExternalURL: "" Keepproxy: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "" WebDAV: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} # Base URL for Workbench inline preview. If blank, use # WebDAVDownload instead, and disable inline preview. # If both are empty, downloading collections from workbench @@ -111,7 +101,7 @@ Clusters: ExternalURL: "" WebDAVDownload: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} # Base URL for download links. If blank, serve links to WebDAV # with disposition=attachment query param. Unlike preview links, # browsers do not render attachments, so there is no risk of XSS. @@ -125,13 +115,19 @@ Clusters: ExternalURL: "" Keepstore: - InternalURLs: {} + InternalURLs: + SAMPLE: + # Rendezvous is normally empty/omitted. When changing the + # URL of a Keepstore service, Rendezvous should be set to + # the old URL (with trailing slash omitted) to preserve + # rendezvous ordering. + Rendezvous: "" ExternalURL: "-" Composer: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "" WebShell: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} # ShellInABox service endpoint URL for a given VM. If empty, do not # offer web shell logins. # @@ -142,13 +138,13 @@ Clusters: # https://*.webshell.uuid_prefix.arvadosapi.com ExternalURL: "" Workbench1: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "" Workbench2: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "" Health: - InternalURLs: {} + InternalURLs: {SAMPLE: {}} ExternalURL: "-" PostgreSQL: @@ -561,8 +557,36 @@ Clusters: # Persistent sessions. MaxSessions: 100 + # Selectively set permissions for regular users and admins to + # download or upload data files using the upload/download + # features for Workbench, WebDAV and S3 API support. + WebDAVPermission: + User: + Download: true + Upload: true + Admin: + Download: true + Upload: true + + # Selectively set permissions for regular users and admins to be + # able to download or upload blocks using arv-put and + # arv-get from outside the cluster. + KeepproxyPermission: + User: + Download: true + Upload: true + Admin: + Download: true + Upload: true + + # Post upload / download events to the API server logs table, so + # that they can be included in the arv-user-activity report. + # You can disable this if you find that it is creating excess + # load on the API server and you don't need it. + WebDAVLogEvents: true + Login: - # One of the following mechanisms (SSO, Google, PAM, LDAP, or + # One of the following mechanisms (Google, PAM, LDAP, or # LoginCluster) should be enabled; see # https://doc.arvados.org/install/setup-login.html @@ -743,16 +767,6 @@ Clusters: # originally supplied by the user will be used. UsernameAttribute: uid - SSO: - # Authenticate with a separate SSO server. (Deprecated) - Enable: false - - # ProviderAppID and ProviderAppSecret are generated during SSO - # setup; see - # https://doc.arvados.org/v2.0/install/install-sso.html#update-config - ProviderAppID: "" - ProviderAppSecret: "" - Test: # Authenticate users listed here in the config file. This # feature is intended to be used in test environments, and diff --git a/lib/config/load.go b/lib/config/load.go index cc26cdaecc..169b252a0e 100644 --- a/lib/config/load.go +++ b/lib/config/load.go @@ -182,6 +182,11 @@ func (ldr *Loader) Load() (*arvados.Config, error) { ldr.configdata = buf } + // FIXME: We should reject YAML if the same key is used twice + // in a map/object, like {foo: bar, foo: baz}. Maybe we'll get + // this fixed free when we upgrade ghodss/yaml to a version + // that uses go-yaml v3. + // Load the config into a dummy map to get the cluster ID // keys, discarding the values; then set up defaults for each // cluster ID; then load the real config on top of the @@ -291,6 +296,8 @@ func (ldr *Loader) Load() (*arvados.Config, error) { checkKeyConflict(fmt.Sprintf("Clusters.%s.PostgreSQL.Connection", id), cc.PostgreSQL.Connection), ldr.checkEmptyKeepstores(cc), ldr.checkUnlistedKeepstores(cc), + // TODO: check non-empty Rendezvous on + // services other than Keepstore } { if err != nil { return nil, err @@ -354,20 +361,33 @@ func (ldr *Loader) logExtraKeys(expected, supplied map[string]interface{}, prefi if ldr.Logger == nil { return } - allowed := map[string]interface{}{} - for k, v := range expected { - allowed[strings.ToLower(k)] = v - } for k, vsupp := range supplied { if k == "SAMPLE" { // entry will be dropped in removeSampleKeys anyway continue } - vexp, ok := allowed[strings.ToLower(k)] + vexp, ok := expected[k] if expected["SAMPLE"] != nil { + // use the SAMPLE entry's keys as the + // "expected" map when checking vsupp + // recursively. vexp = expected["SAMPLE"] } else if !ok { - ldr.Logger.Warnf("deprecated or unknown config entry: %s%s", prefix, k) + // check for a case-insensitive match + hint := "" + for ek := range expected { + if strings.EqualFold(k, ek) { + hint = " (perhaps you meant " + ek + "?)" + // If we don't delete this, it + // will end up getting merged, + // unpredictably + // merging/overriding the + // default. + delete(supplied, k) + break + } + } + ldr.Logger.Warnf("deprecated or unknown config entry: %s%s%s", prefix, k, hint) continue } if vsupp, ok := vsupp.(map[string]interface{}); !ok { diff --git a/lib/config/load_test.go b/lib/config/load_test.go index 6c11ee7803..396faca484 100644 --- a/lib/config/load_test.go +++ b/lib/config/load_test.go @@ -196,21 +196,37 @@ Clusters: SystemRootToken: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa Collections: BlobSigningKey: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa - postgresql: {} - BadKey: {} - Containers: {} + PostgreSQL: {} + BadKey1: {} + Containers: + RunTimeEngine: abc RemoteClusters: z2222: Host: z2222.arvadosapi.com Proxy: true - BadKey: badValue + BadKey2: badValue + Services: + KeepStore: + InternalURLs: + "http://host.example:12345": {} + Keepstore: + InternalURLs: + "http://host.example:12345": + RendezVous: x + ServiceS: + Keepstore: + InternalURLs: + "http://host.example:12345": {} + Volumes: + zzzzz-nyw5e-aaaaaaaaaaaaaaa: {} `, &logbuf).Load() c.Assert(err, check.IsNil) + c.Log(logbuf.String()) logs := strings.Split(strings.TrimSuffix(logbuf.String(), "\n"), "\n") for _, log := range logs { - c.Check(log, check.Matches, `.*deprecated or unknown config entry:.*BadKey.*`) + c.Check(log, check.Matches, `.*deprecated or unknown config entry:.*(RunTimeEngine.*RuntimeEngine|BadKey1|BadKey2|KeepStore|ServiceS|RendezVous).*`) } - c.Check(logs, check.HasLen, 2) + c.Check(logs, check.HasLen, 6) } func (s *LoadSuite) checkSAMPLEKeys(c *check.C, path string, x interface{}) { @@ -322,8 +338,8 @@ func (s *LoadSuite) TestPostgreSQLKeyConflict(c *check.C) { _, err := testLoader(c, ` Clusters: zzzzz: - postgresql: - connection: + PostgreSQL: + Connection: DBName: dbname Host: host `, nil).Load() diff --git a/lib/controller/handler_test.go b/lib/controller/handler_test.go index 2911a4f031..9b71c349a4 100644 --- a/lib/controller/handler_test.go +++ b/lib/controller/handler_test.go @@ -164,34 +164,6 @@ func (s *HandlerSuite) TestProxyNotFound(c *check.C) { c.Check(jresp["errors"], check.FitsTypeOf, []interface{}{}) } -func (s *HandlerSuite) TestProxyRedirect(c *check.C) { - s.cluster.Login.SSO.Enable = true - s.cluster.Login.SSO.ProviderAppID = "test" - s.cluster.Login.SSO.ProviderAppSecret = "test" - req := httptest.NewRequest("GET", "https://0.0.0.0:1/login?return_to=foo", nil) - resp := httptest.NewRecorder() - s.handler.ServeHTTP(resp, req) - if !c.Check(resp.Code, check.Equals, http.StatusFound) { - c.Log(resp.Body.String()) - } - // Old "proxy entire request" code path returns an absolute - // URL. New lib/controller/federation code path returns a - // relative URL. - c.Check(resp.Header().Get("Location"), check.Matches, `(https://0.0.0.0:1)?/auth/joshid\?return_to=%2Cfoo&?`) -} - -func (s *HandlerSuite) TestLogoutSSO(c *check.C) { - s.cluster.Login.SSO.Enable = true - s.cluster.Login.SSO.ProviderAppID = "test" - req := httptest.NewRequest("GET", "https://0.0.0.0:1/logout?return_to=https://example.com/foo", nil) - resp := httptest.NewRecorder() - s.handler.ServeHTTP(resp, req) - if !c.Check(resp.Code, check.Equals, http.StatusFound) { - c.Log(resp.Body.String()) - } - c.Check(resp.Header().Get("Location"), check.Equals, "http://localhost:3002/users/sign_out?"+url.Values{"redirect_uri": {"https://example.com/foo"}}.Encode()) -} - func (s *HandlerSuite) TestLogoutGoogle(c *check.C) { s.cluster.Login.Google.Enable = true s.cluster.Login.Google.ClientID = "test" diff --git a/lib/controller/localdb/login.go b/lib/controller/localdb/login.go index 0d6f2ef027..3c7b01baad 100644 --- a/lib/controller/localdb/login.go +++ b/lib/controller/localdb/login.go @@ -30,15 +30,14 @@ type loginController interface { func chooseLoginController(cluster *arvados.Cluster, parent *Conn) loginController { wantGoogle := cluster.Login.Google.Enable wantOpenIDConnect := cluster.Login.OpenIDConnect.Enable - wantSSO := cluster.Login.SSO.Enable wantPAM := cluster.Login.PAM.Enable wantLDAP := cluster.Login.LDAP.Enable wantTest := cluster.Login.Test.Enable wantLoginCluster := cluster.Login.LoginCluster != "" && cluster.Login.LoginCluster != cluster.ClusterID switch { - case 1 != countTrue(wantGoogle, wantOpenIDConnect, wantSSO, wantPAM, wantLDAP, wantTest, wantLoginCluster): + case 1 != countTrue(wantGoogle, wantOpenIDConnect, wantPAM, wantLDAP, wantTest, wantLoginCluster): return errorLoginController{ - error: errors.New("configuration problem: exactly one of Login.Google, Login.OpenIDConnect, Login.SSO, Login.PAM, Login.LDAP, Login.Test, or Login.LoginCluster must be set"), + error: errors.New("configuration problem: exactly one of Login.Google, Login.OpenIDConnect, Login.PAM, Login.LDAP, Login.Test, or Login.LoginCluster must be set"), } case wantGoogle: return &oidcLoginController{ @@ -66,8 +65,6 @@ func chooseLoginController(cluster *arvados.Cluster, parent *Conn) loginControll AcceptAccessToken: cluster.Login.OpenIDConnect.AcceptAccessToken, AcceptAccessTokenScope: cluster.Login.OpenIDConnect.AcceptAccessTokenScope, } - case wantSSO: - return &ssoLoginController{Parent: parent} case wantPAM: return &pamLoginController{Cluster: cluster, Parent: parent} case wantLDAP: @@ -93,20 +90,6 @@ func countTrue(vals ...bool) int { return n } -// Login and Logout are passed through to the parent's railsProxy; -// UserAuthenticate is rejected. -type ssoLoginController struct{ Parent *Conn } - -func (ctrl *ssoLoginController) Login(ctx context.Context, opts arvados.LoginOptions) (arvados.LoginResponse, error) { - return ctrl.Parent.railsProxy.Login(ctx, opts) -} -func (ctrl *ssoLoginController) Logout(ctx context.Context, opts arvados.LogoutOptions) (arvados.LogoutResponse, error) { - return ctrl.Parent.railsProxy.Logout(ctx, opts) -} -func (ctrl *ssoLoginController) UserAuthenticate(ctx context.Context, opts arvados.UserAuthenticateOptions) (arvados.APIClientAuthorization, error) { - return arvados.APIClientAuthorization{}, httpserver.ErrorWithStatus(errors.New("username/password authentication is not available"), http.StatusBadRequest) -} - type errorLoginController struct{ error } func (ctrl errorLoginController) Login(context.Context, arvados.LoginOptions) (arvados.LoginResponse, error) { diff --git a/lib/controller/localdb/login_oidc_test.go b/lib/controller/localdb/login_oidc_test.go index c9d6133c48..4be7d58f69 100644 --- a/lib/controller/localdb/login_oidc_test.go +++ b/lib/controller/localdb/login_oidc_test.go @@ -63,7 +63,7 @@ func (s *OIDCLoginSuite) SetUpTest(c *check.C) { c.Assert(err, check.IsNil) s.cluster, err = cfg.GetCluster("") c.Assert(err, check.IsNil) - s.cluster.Login.SSO.Enable = false + s.cluster.Login.Test.Enable = false s.cluster.Login.Google.Enable = true s.cluster.Login.Google.ClientID = "test%client$id" s.cluster.Login.Google.ClientSecret = "test#client/secret" diff --git a/lib/controller/rpc/conn_test.go b/lib/controller/rpc/conn_test.go index cf4dbc4767..eee8db9ac8 100644 --- a/lib/controller/rpc/conn_test.go +++ b/lib/controller/rpc/conn_test.go @@ -50,9 +50,8 @@ func (s *RPCSuite) TestLogin(c *check.C) { opts := arvados.LoginOptions{ ReturnTo: "https://foo.example.com/bar", } - resp, err := s.conn.Login(s.ctx, opts) - c.Check(err, check.IsNil) - c.Check(resp.RedirectLocation, check.Equals, "/auth/joshid?return_to="+url.QueryEscape(","+opts.ReturnTo)) + _, err := s.conn.Login(s.ctx, opts) + c.Check(err.(*arvados.TransactionError).StatusCode, check.Equals, 404) } func (s *RPCSuite) TestLogout(c *check.C) { @@ -62,7 +61,7 @@ func (s *RPCSuite) TestLogout(c *check.C) { } resp, err := s.conn.Logout(s.ctx, opts) c.Check(err, check.IsNil) - c.Check(resp.RedirectLocation, check.Equals, "http://localhost:3002/users/sign_out?redirect_uri="+url.QueryEscape(opts.ReturnTo)) + c.Check(resp.RedirectLocation, check.Equals, opts.ReturnTo) } func (s *RPCSuite) TestCollectionCreate(c *check.C) { diff --git a/lib/costanalyzer/cmd.go b/lib/costanalyzer/cmd.go index 525ec619b5..6065ad2c0b 100644 --- a/lib/costanalyzer/cmd.go +++ b/lib/costanalyzer/cmd.go @@ -8,8 +8,8 @@ import ( "io" "time" + "git.arvados.org/arvados.git/lib/cmd" "git.arvados.org/arvados.git/sdk/go/ctxlog" - "github.com/sirupsen/logrus" ) var Command = command{} @@ -22,24 +22,17 @@ type command struct { end time.Time } -type NoPrefixFormatter struct{} - -func (f *NoPrefixFormatter) Format(entry *logrus.Entry) ([]byte, error) { - return []byte(entry.Message), nil -} - // RunCommand implements the subcommand "costanalyzer ..." func (c command) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { var err error logger := ctxlog.New(stderr, "text", "info") + logger.SetFormatter(cmd.NoPrefixFormatter{}) defer func() { if err != nil { - logger.Error("\n" + err.Error() + "\n") + logger.Error("\n" + err.Error()) } }() - logger.SetFormatter(new(NoPrefixFormatter)) - exitcode, err := c.costAnalyzer(prog, args, logger, stdout, stderr) return exitcode diff --git a/lib/costanalyzer/costanalyzer.go b/lib/costanalyzer/costanalyzer.go index edaaa5bd17..dfe3d584ce 100644 --- a/lib/costanalyzer/costanalyzer.go +++ b/lib/costanalyzer/costanalyzer.go @@ -9,9 +9,6 @@ import ( "errors" "flag" "fmt" - "git.arvados.org/arvados.git/sdk/go/arvados" - "git.arvados.org/arvados.git/sdk/go/arvadosclient" - "git.arvados.org/arvados.git/sdk/go/keepclient" "io" "io/ioutil" "net/http" @@ -20,6 +17,9 @@ import ( "strings" "time" + "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/arvadosclient" + "git.arvados.org/arvados.git/sdk/go/keepclient" "github.com/sirupsen/logrus" ) @@ -169,7 +169,7 @@ Options: } logger.SetLevel(lvl) if !c.cache { - logger.Debug("Caching disabled\n") + logger.Debug("Caching disabled") } return } @@ -249,12 +249,12 @@ func loadCachedObject(logger *logrus.Logger, file string, uuid string, object in case *arvados.ContainerRequest: if v.State == arvados.ContainerRequestStateFinal { reload = false - logger.Debugf("Loaded object %s from local cache (%s)\n", uuid, file) + logger.Debugf("Loaded object %s from local cache (%s)", uuid, file) } case *arvados.Container: if v.State == arvados.ContainerStateComplete || v.State == arvados.ContainerStateCancelled { reload = false - logger.Debugf("Loaded object %s from local cache (%s)\n", uuid, file) + logger.Debugf("Loaded object %s from local cache (%s)", uuid, file) } } return @@ -384,7 +384,7 @@ func handleProject(logger *logrus.Logger, uuid string, arv *arvadosclient.Arvado return nil, fmt.Errorf("error querying container_requests: %s", err.Error()) } if value, ok := childCrs["items"]; ok { - logger.Infof("Collecting top level container requests in project %s\n", uuid) + logger.Infof("Collecting top level container requests in project %s", uuid) items := value.([]interface{}) for _, item := range items { itemMap := item.(map[string]interface{}) @@ -397,7 +397,7 @@ func handleProject(logger *logrus.Logger, uuid string, arv *arvadosclient.Arvado } } } else { - logger.Infof("No top level container requests found in project %s\n", uuid) + logger.Infof("No top level container requests found in project %s", uuid) } return } @@ -410,7 +410,7 @@ func generateCrCsv(logger *logrus.Logger, uuid string, arv *arvadosclient.Arvado var tmpCsv string var tmpTotalCost float64 var totalCost float64 - fmt.Printf("Processing %s\n", uuid) + logger.Debugf("Processing %s", uuid) var crUUID = uuid if strings.Contains(uuid, "-4zz18-") { @@ -438,7 +438,7 @@ func generateCrCsv(logger *logrus.Logger, uuid string, arv *arvadosclient.Arvado } if len(cr.ContainerUUID) == 0 { // Nothing to do! E.g. a CR in 'Uncommitted' state. - logger.Infof("No container associated with container request %s, skipping\n", crUUID) + logger.Infof("No container associated with container request %s, skipping", crUUID) return nil, nil } var container arvados.Container @@ -473,14 +473,20 @@ func generateCrCsv(logger *logrus.Logger, uuid string, arv *arvadosclient.Arvado return nil, fmt.Errorf("error querying container_requests: %s", err.Error()) } logger.Infof("Collecting child containers for container request %s (%s)", crUUID, container.FinishedAt) - for _, cr2 := range childCrs.Items { - logger.Info(".") + progressTicker := time.NewTicker(5 * time.Second) + defer progressTicker.Stop() + for i, cr2 := range childCrs.Items { + select { + case <-progressTicker.C: + logger.Infof("... %d of %d", i+1, len(childCrs.Items)) + default: + } node, err := getNode(arv, ac, kc, cr2) if err != nil { logger.Errorf("Skipping container request %s: error getting node %s: %s", cr2.UUID, cr2.UUID, err) continue } - logger.Debug("\nChild container: " + cr2.ContainerUUID + "\n") + logger.Debug("Child container: " + cr2.ContainerUUID) var c2 arvados.Container err = loadObject(logger, ac, cr.UUID, cr2.ContainerUUID, cache, &c2) if err != nil { @@ -491,7 +497,7 @@ func generateCrCsv(logger *logrus.Logger, uuid string, arv *arvadosclient.Arvado csv += tmpCsv totalCost += tmpTotalCost } - logger.Info(" done\n") + logger.Debug("Done collecting child containers") csv += "TOTAL,,,,,,,,," + strconv.FormatFloat(totalCost, 'f', 8, 64) + "\n" @@ -502,7 +508,7 @@ func generateCrCsv(logger *logrus.Logger, uuid string, arv *arvadosclient.Arvado if err != nil { return nil, fmt.Errorf("error writing file with path %s: %s", fName, err.Error()) } - logger.Infof("\nUUID report in %s\n\n", fName) + logger.Infof("\nUUID report in %s", fName) } return @@ -562,7 +568,7 @@ func (c *command) costAnalyzer(prog string, args []string, logger *logrus.Logger err := ac.RequestAndDecode(&list, "GET", "arvados/v1/container_requests", nil, params) if err != nil { - logger.Errorf("Error getting container request list from Arvados API: %s\n", err) + logger.Errorf("Error getting container request list from Arvados API: %s", err) break } if len(list.Items) == 0 { @@ -581,7 +587,7 @@ func (c *command) costAnalyzer(prog string, args []string, logger *logrus.Logger cost := make(map[string]float64) for uuid := range uuidChannel { - fmt.Printf("Considering %s\n", uuid) + logger.Debugf("Considering %s", uuid) if strings.Contains(uuid, "-j7d0g-") { // This is a project (group) cost, err = handleProject(logger, uuid, arv, ac, kc, c.resultsDir, c.cache) @@ -611,14 +617,14 @@ func (c *command) costAnalyzer(prog string, args []string, logger *logrus.Logger // keep going. logger.Errorf("cost analysis is not supported for the 'Home' project: %s", uuid) } else { - logger.Errorf("this argument does not look like a uuid: %s\n", uuid) + logger.Errorf("this argument does not look like a uuid: %s", uuid) exitcode = 3 return } } if len(cost) == 0 { - logger.Info("Nothing to do!\n") + logger.Info("Nothing to do!") return } @@ -646,7 +652,7 @@ func (c *command) costAnalyzer(prog string, args []string, logger *logrus.Logger exitcode = 1 return } - logger.Infof("Aggregate cost accounting for all supplied uuids in %s\n", aFile) + logger.Infof("Aggregate cost accounting for all supplied uuids in %s", aFile) } // Output the total dollar amount on stdout diff --git a/lib/deduplicationreport/command.go b/lib/deduplicationreport/command.go index 93cdb61d3b..39bbbc9498 100644 --- a/lib/deduplicationreport/command.go +++ b/lib/deduplicationreport/command.go @@ -7,20 +7,14 @@ package deduplicationreport import ( "io" + "git.arvados.org/arvados.git/lib/cmd" "git.arvados.org/arvados.git/sdk/go/ctxlog" - "github.com/sirupsen/logrus" ) var Command command type command struct{} -type NoPrefixFormatter struct{} - -func (f *NoPrefixFormatter) Format(entry *logrus.Entry) ([]byte, error) { - return []byte(entry.Message + "\n"), nil -} - // RunCommand implements the subcommand "deduplication-report ..." func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { var err error @@ -31,7 +25,7 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s } }() - logger.SetFormatter(new(NoPrefixFormatter)) + logger.SetFormatter(cmd.NoPrefixFormatter{}) exitcode := report(prog, args, logger, stdout, stderr) diff --git a/lib/diagnostics/cmd.go b/lib/diagnostics/cmd.go new file mode 100644 index 0000000000..b0241b3ae4 --- /dev/null +++ b/lib/diagnostics/cmd.go @@ -0,0 +1,612 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: AGPL-3.0 + +package diagnostics + +import ( + "bytes" + "context" + "flag" + "fmt" + "io" + "io/ioutil" + "net" + "net/http" + "net/url" + "strings" + "time" + + "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/ctxlog" + "github.com/sirupsen/logrus" +) + +type Command struct{} + +func (cmd Command) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { + var diag diagnoser + f := flag.NewFlagSet(prog, flag.ContinueOnError) + f.StringVar(&diag.projectName, "project-name", "scratch area for diagnostics", "name of project to find/create in home project and use for temporary/test objects") + f.StringVar(&diag.logLevel, "log-level", "info", "logging level (debug, info, warning, error)") + f.BoolVar(&diag.checkInternal, "internal-client", false, "check that this host is considered an \"internal\" client") + f.BoolVar(&diag.checkExternal, "external-client", false, "check that this host is considered an \"external\" client") + f.IntVar(&diag.priority, "priority", 500, "priority for test container (1..1000, or 0 to skip)") + f.DurationVar(&diag.timeout, "timeout", 10*time.Second, "timeout for http requests") + err := f.Parse(args) + if err == flag.ErrHelp { + return 0 + } else if err != nil { + fmt.Fprintln(stderr, err) + return 2 + } + diag.logger = ctxlog.New(stdout, "text", diag.logLevel) + diag.logger.SetFormatter(&logrus.TextFormatter{DisableTimestamp: true, DisableLevelTruncation: true, PadLevelText: true}) + diag.runtests() + if len(diag.errors) == 0 { + diag.logger.Info("--- no errors ---") + return 0 + } else { + if diag.logger.Level > logrus.ErrorLevel { + fmt.Fprint(stdout, "\n--- cut here --- error summary ---\n\n") + for _, e := range diag.errors { + diag.logger.Error(e) + } + } + return 1 + } +} + +type diagnoser struct { + stdout io.Writer + stderr io.Writer + logLevel string + priority int + projectName string + checkInternal bool + checkExternal bool + timeout time.Duration + logger *logrus.Logger + errors []string + done map[int]bool +} + +func (diag *diagnoser) debugf(f string, args ...interface{}) { + diag.logger.Debugf(" ... "+f, args...) +} + +func (diag *diagnoser) infof(f string, args ...interface{}) { + diag.logger.Infof(" ... "+f, args...) +} + +func (diag *diagnoser) warnf(f string, args ...interface{}) { + diag.logger.Warnf(" ... "+f, args...) +} + +func (diag *diagnoser) errorf(f string, args ...interface{}) { + diag.logger.Errorf(f, args...) + diag.errors = append(diag.errors, fmt.Sprintf(f, args...)) +} + +// Run the given func, logging appropriate messages before and after, +// adding timing info, etc. +// +// The id argument should be unique among tests, and shouldn't change +// when other tests are added/removed. +func (diag *diagnoser) dotest(id int, title string, fn func() error) { + if diag.done == nil { + diag.done = map[int]bool{} + } else if diag.done[id] { + diag.errorf("(bug) reused test id %d", id) + } + diag.done[id] = true + + diag.logger.Infof("%4d: %s", id, title) + t0 := time.Now() + err := fn() + elapsed := fmt.Sprintf("%d ms", time.Now().Sub(t0)/time.Millisecond) + if err != nil { + diag.errorf("%4d: %s (%s): %s", id, title, elapsed, err) + } else { + diag.logger.Debugf("%4d: %s (%s): ok", id, title, elapsed) + } +} + +func (diag *diagnoser) runtests() { + client := arvados.NewClientFromEnv() + + if client.APIHost == "" || client.AuthToken == "" { + diag.errorf("ARVADOS_API_HOST and ARVADOS_API_TOKEN environment variables are not set -- aborting without running any tests") + return + } + + var dd arvados.DiscoveryDocument + ddpath := "discovery/v1/apis/arvados/v1/rest" + diag.dotest(10, fmt.Sprintf("getting discovery document from https://%s/%s", client.APIHost, ddpath), func() error { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) + defer cancel() + err := client.RequestAndDecodeContext(ctx, &dd, "GET", ddpath, nil, nil) + if err != nil { + return err + } + diag.debugf("BlobSignatureTTL = %d", dd.BlobSignatureTTL) + return nil + }) + + var cluster arvados.Cluster + cfgpath := "arvados/v1/config" + diag.dotest(20, fmt.Sprintf("getting exported config from https://%s/%s", client.APIHost, cfgpath), func() error { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) + defer cancel() + err := client.RequestAndDecodeContext(ctx, &cluster, "GET", cfgpath, nil, nil) + if err != nil { + return err + } + diag.debugf("Collections.BlobSigning = %v", cluster.Collections.BlobSigning) + return nil + }) + + var user arvados.User + diag.dotest(30, "getting current user record", func() error { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) + defer cancel() + err := client.RequestAndDecodeContext(ctx, &user, "GET", "arvados/v1/users/current", nil, nil) + if err != nil { + return err + } + diag.debugf("user uuid = %s", user.UUID) + return nil + }) + + // uncomment to create some spurious errors + // cluster.Services.WebDAVDownload.ExternalURL.Host = "0.0.0.0:9" + + // TODO: detect routing errors here, like finding wb2 at the + // wb1 address. + for i, svc := range []*arvados.Service{ + &cluster.Services.Keepproxy, + &cluster.Services.WebDAV, + &cluster.Services.WebDAVDownload, + &cluster.Services.Websocket, + &cluster.Services.Workbench1, + &cluster.Services.Workbench2, + } { + diag.dotest(40+i, fmt.Sprintf("connecting to service endpoint %s", svc.ExternalURL), func() error { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) + defer cancel() + u := svc.ExternalURL + if strings.HasPrefix(u.Scheme, "ws") { + // We can do a real websocket test elsewhere, + // but for now we'll just check the https + // connection. + u.Scheme = "http" + u.Scheme[2:] + } + if svc == &cluster.Services.WebDAV && strings.HasPrefix(u.Host, "*") { + u.Host = "d41d8cd98f00b204e9800998ecf8427e-0" + u.Host[1:] + } + req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil) + if err != nil { + return err + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + resp.Body.Close() + return nil + }) + } + + for i, url := range []string{ + cluster.Services.Controller.ExternalURL.String(), + cluster.Services.Keepproxy.ExternalURL.String() + "d41d8cd98f00b204e9800998ecf8427e+0", + cluster.Services.WebDAVDownload.ExternalURL.String(), + } { + diag.dotest(50+i, fmt.Sprintf("checking CORS headers at %s", url), func() error { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) + defer cancel() + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return err + } + req.Header.Set("Origin", "https://example.com") + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + if hdr := resp.Header.Get("Access-Control-Allow-Origin"); hdr != "*" { + return fmt.Errorf("expected \"Access-Control-Allow-Origin: *\", got %q", hdr) + } + return nil + }) + } + + var keeplist arvados.KeepServiceList + diag.dotest(60, "checking internal/external client detection", func() error { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) + defer cancel() + err := client.RequestAndDecodeContext(ctx, &keeplist, "GET", "arvados/v1/keep_services/accessible", nil, arvados.ListOptions{Limit: 999999}) + if err != nil { + return fmt.Errorf("error getting keep services list: %s", err) + } else if len(keeplist.Items) == 0 { + return fmt.Errorf("controller did not return any keep services") + } + found := map[string]int{} + for _, ks := range keeplist.Items { + found[ks.ServiceType]++ + } + isInternal := found["proxy"] == 0 && len(keeplist.Items) > 0 + isExternal := found["proxy"] > 0 && found["proxy"] == len(keeplist.Items) + if isExternal { + diag.debugf("controller returned only proxy services, this host is treated as \"external\"") + } else if isInternal { + diag.debugf("controller returned only non-proxy services, this host is treated as \"internal\"") + } + if (diag.checkInternal && !isInternal) || (diag.checkExternal && !isExternal) { + return fmt.Errorf("expecting internal=%v external=%v, but found internal=%v external=%v", diag.checkInternal, diag.checkExternal, isInternal, isExternal) + } + return nil + }) + + for i, ks := range keeplist.Items { + u := url.URL{ + Scheme: "http", + Host: net.JoinHostPort(ks.ServiceHost, fmt.Sprintf("%d", ks.ServicePort)), + Path: "/", + } + if ks.ServiceSSLFlag { + u.Scheme = "https" + } + diag.dotest(61+i, fmt.Sprintf("reading+writing via keep service at %s", u.String()), func() error { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) + defer cancel() + req, err := http.NewRequestWithContext(ctx, "PUT", u.String()+"d41d8cd98f00b204e9800998ecf8427e", nil) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+client.AuthToken) + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("reading response body: %s", err) + } + loc := strings.TrimSpace(string(body)) + if !strings.HasPrefix(loc, "d41d8") { + return fmt.Errorf("unexpected response from write: %q", body) + } + + req, err = http.NewRequestWithContext(ctx, "GET", u.String()+loc, nil) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+client.AuthToken) + resp, err = http.DefaultClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + body, err = ioutil.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("reading response body: %s", err) + } + if len(body) != 0 { + return fmt.Errorf("unexpected response from read: %q", body) + } + + return nil + }) + } + + var project arvados.Group + diag.dotest(80, fmt.Sprintf("finding/creating %q project", diag.projectName), func() error { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) + defer cancel() + var grplist arvados.GroupList + err := client.RequestAndDecodeContext(ctx, &grplist, "GET", "arvados/v1/groups", nil, arvados.ListOptions{ + Filters: []arvados.Filter{ + {"name", "=", diag.projectName}, + {"group_class", "=", "project"}, + {"owner_uuid", "=", user.UUID}}, + Limit: 999999}) + if err != nil { + return fmt.Errorf("list groups: %s", err) + } + if len(grplist.Items) > 0 { + project = grplist.Items[0] + diag.debugf("using existing project, uuid = %s", project.UUID) + return nil + } + diag.debugf("list groups: ok, no results") + err = client.RequestAndDecodeContext(ctx, &project, "POST", "arvados/v1/groups", nil, map[string]interface{}{"group": map[string]interface{}{ + "name": diag.projectName, + "group_class": "project", + }}) + if err != nil { + return fmt.Errorf("create project: %s", err) + } + diag.debugf("created project, uuid = %s", project.UUID) + return nil + }) + + var collection arvados.Collection + diag.dotest(90, "creating temporary collection", func() error { + if project.UUID == "" { + return fmt.Errorf("skipping, no project to work in") + } + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) + defer cancel() + err := client.RequestAndDecodeContext(ctx, &collection, "POST", "arvados/v1/collections", nil, map[string]interface{}{ + "ensure_unique_name": true, + "collection": map[string]interface{}{ + "owner_uuid": project.UUID, + "name": "test collection", + "trash_at": time.Now().Add(time.Hour)}}) + if err != nil { + return err + } + diag.debugf("ok, uuid = %s", collection.UUID) + return nil + }) + + if collection.UUID != "" { + defer func() { + diag.dotest(9990, "deleting temporary collection", func() error { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) + defer cancel() + return client.RequestAndDecodeContext(ctx, nil, "DELETE", "arvados/v1/collections/"+collection.UUID, nil, nil) + }) + }() + } + + diag.dotest(100, "uploading file via webdav", func() error { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) + defer cancel() + if collection.UUID == "" { + return fmt.Errorf("skipping, no test collection") + } + req, err := http.NewRequestWithContext(ctx, "PUT", cluster.Services.WebDAVDownload.ExternalURL.String()+"c="+collection.UUID+"/testfile", bytes.NewBufferString("testfiledata")) + if err != nil { + return fmt.Errorf("BUG? http.NewRequest: %s", err) + } + req.Header.Set("Authorization", "Bearer "+client.AuthToken) + resp, err := http.DefaultClient.Do(req) + if err != nil { + return fmt.Errorf("error performing http request: %s", err) + } + resp.Body.Close() + if resp.StatusCode != http.StatusCreated { + return fmt.Errorf("status %s", resp.Status) + } + diag.debugf("ok, status %s", resp.Status) + err = client.RequestAndDecodeContext(ctx, &collection, "GET", "arvados/v1/collections/"+collection.UUID, nil, nil) + if err != nil { + return fmt.Errorf("get updated collection: %s", err) + } + diag.debugf("ok, pdh %s", collection.PortableDataHash) + return nil + }) + + davurl := cluster.Services.WebDAV.ExternalURL + diag.dotest(110, fmt.Sprintf("checking WebDAV ExternalURL wildcard (%s)", davurl), func() error { + if davurl.Host == "" { + return fmt.Errorf("host missing - content previews will not work") + } + if !strings.HasPrefix(davurl.Host, "*--") && !strings.HasPrefix(davurl.Host, "*.") && !cluster.Collections.TrustAllContent { + diag.warnf("WebDAV ExternalURL has no leading wildcard and TrustAllContent==false - content previews will not work") + } + return nil + }) + + for i, trial := range []struct { + needcoll bool + status int + fileurl string + }{ + {false, http.StatusNotFound, strings.Replace(davurl.String(), "*", "d41d8cd98f00b204e9800998ecf8427e-0", 1) + "foo"}, + {false, http.StatusNotFound, strings.Replace(davurl.String(), "*", "d41d8cd98f00b204e9800998ecf8427e-0", 1) + "testfile"}, + {false, http.StatusNotFound, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=d41d8cd98f00b204e9800998ecf8427e+0/_/foo"}, + {false, http.StatusNotFound, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=d41d8cd98f00b204e9800998ecf8427e+0/_/testfile"}, + {true, http.StatusOK, strings.Replace(davurl.String(), "*", strings.Replace(collection.PortableDataHash, "+", "-", -1), 1) + "testfile"}, + {true, http.StatusOK, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=" + collection.UUID + "/_/testfile"}, + } { + diag.dotest(120+i, fmt.Sprintf("downloading from webdav (%s)", trial.fileurl), func() error { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) + defer cancel() + if trial.needcoll && collection.UUID == "" { + return fmt.Errorf("skipping, no test collection") + } + req, err := http.NewRequestWithContext(ctx, "GET", trial.fileurl, nil) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+client.AuthToken) + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("reading response: %s", err) + } + if resp.StatusCode != trial.status { + return fmt.Errorf("unexpected response status: %s", resp.Status) + } + if trial.status == http.StatusOK && string(body) != "testfiledata" { + return fmt.Errorf("unexpected response content: %q", body) + } + return nil + }) + } + + var vm arvados.VirtualMachine + diag.dotest(130, "getting list of virtual machines", func() error { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) + defer cancel() + var vmlist arvados.VirtualMachineList + err := client.RequestAndDecodeContext(ctx, &vmlist, "GET", "arvados/v1/virtual_machines", nil, arvados.ListOptions{Limit: 999999}) + if err != nil { + return err + } + if len(vmlist.Items) < 1 { + return fmt.Errorf("no VMs found") + } + vm = vmlist.Items[0] + return nil + }) + + diag.dotest(140, "getting workbench1 webshell page", func() error { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) + defer cancel() + if vm.UUID == "" { + return fmt.Errorf("skipping, no vm available") + } + webshelltermurl := cluster.Services.Workbench1.ExternalURL.String() + "virtual_machines/" + vm.UUID + "/webshell/testusername" + diag.debugf("url %s", webshelltermurl) + req, err := http.NewRequestWithContext(ctx, "GET", webshelltermurl, nil) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+client.AuthToken) + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("reading response: %s", err) + } + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("unexpected response status: %s %q", resp.Status, body) + } + return nil + }) + + diag.dotest(150, "connecting to webshell service", func() error { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) + defer cancel() + if vm.UUID == "" { + return fmt.Errorf("skipping, no vm available") + } + u := cluster.Services.WebShell.ExternalURL + webshellurl := u.String() + vm.Hostname + "?" + if strings.HasPrefix(u.Host, "*") { + u.Host = vm.Hostname + u.Host[1:] + webshellurl = u.String() + "?" + } + diag.debugf("url %s", webshellurl) + req, err := http.NewRequestWithContext(ctx, "POST", webshellurl, bytes.NewBufferString(url.Values{ + "width": {"80"}, + "height": {"25"}, + "session": {"xyzzy"}, + "rooturl": {webshellurl}, + }.Encode())) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8") + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + diag.debugf("response status %s", resp.Status) + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("reading response: %s", err) + } + diag.debugf("response body %q", body) + // We don't speak the protocol, so we get a 400 error + // from the webshell server even if everything is + // OK. Anything else (404, 502, ???) indicates a + // problem. + if resp.StatusCode != http.StatusBadRequest { + return fmt.Errorf("unexpected response status: %s, %q", resp.Status, body) + } + return nil + }) + + diag.dotest(160, "running a container", func() error { + if diag.priority < 1 { + diag.infof("skipping (use priority > 0 if you want to run a container)") + return nil + } + if project.UUID == "" { + return fmt.Errorf("skipping, no project to work in") + } + + var cr arvados.ContainerRequest + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) + defer cancel() + + timestamp := time.Now().Format(time.RFC3339) + err := client.RequestAndDecodeContext(ctx, &cr, "POST", "arvados/v1/container_requests", nil, map[string]interface{}{"container_request": map[string]interface{}{ + "owner_uuid": project.UUID, + "name": fmt.Sprintf("diagnostics container request %s", timestamp), + "container_image": "arvados/jobs", + "command": []string{"echo", timestamp}, + "use_existing": false, + "output_path": "/mnt/output", + "output_name": fmt.Sprintf("diagnostics output %s", timestamp), + "priority": diag.priority, + "state": arvados.ContainerRequestStateCommitted, + "mounts": map[string]map[string]interface{}{ + "/mnt/output": { + "kind": "collection", + "writable": true, + }, + }, + "runtime_constraints": arvados.RuntimeConstraints{ + VCPUs: 1, + RAM: 1 << 26, + KeepCacheRAM: 1 << 26, + }, + }}) + if err != nil { + return err + } + diag.debugf("container request uuid = %s", cr.UUID) + diag.debugf("container uuid = %s", cr.ContainerUUID) + + timeout := 10 * time.Minute + diag.infof("container request submitted, waiting up to %v for container to run", arvados.Duration(timeout)) + ctx, cancel = context.WithDeadline(context.Background(), time.Now().Add(timeout)) + defer cancel() + + var c arvados.Container + for ; cr.State != arvados.ContainerRequestStateFinal; time.Sleep(2 * time.Second) { + ctx, cancel := context.WithDeadline(ctx, time.Now().Add(diag.timeout)) + defer cancel() + + crStateWas := cr.State + err := client.RequestAndDecodeContext(ctx, &cr, "GET", "arvados/v1/container_requests/"+cr.UUID, nil, nil) + if err != nil { + return err + } + if cr.State != crStateWas { + diag.debugf("container request state = %s", cr.State) + } + + cStateWas := c.State + err = client.RequestAndDecodeContext(ctx, &c, "GET", "arvados/v1/containers/"+cr.ContainerUUID, nil, nil) + if err != nil { + return err + } + if c.State != cStateWas { + diag.debugf("container state = %s", c.State) + } + } + + if c.State != arvados.ContainerStateComplete { + return fmt.Errorf("container request %s is final but container %s did not complete: container state = %q", cr.UUID, cr.ContainerUUID, c.State) + } else if c.ExitCode != 0 { + return fmt.Errorf("container exited %d", c.ExitCode) + } + return nil + }) +} diff --git a/sdk/cwl/arvados_cwl/arvtool.py b/sdk/cwl/arvados_cwl/arvtool.py index a9361a85f9..8917692351 100644 --- a/sdk/cwl/arvados_cwl/arvtool.py +++ b/sdk/cwl/arvados_cwl/arvtool.py @@ -15,7 +15,16 @@ def validate_cluster_target(arvrunner, runtimeContext): runtimeContext.submit_runner_cluster not in arvrunner.api._rootDesc["remoteHosts"] and runtimeContext.submit_runner_cluster != arvrunner.api._rootDesc["uuidPrefix"]): raise WorkflowException("Unknown or invalid cluster id '%s' known remote clusters are %s" % (runtimeContext.submit_runner_cluster, - ", ".join(list(arvrunner.api._rootDesc["remoteHosts"].keys())))) + ", ".join(list(arvrunner.api._rootDesc["remoteHosts"].keys())))) + if runtimeContext.project_uuid: + cluster_target = runtimeContext.submit_runner_cluster or arvrunner.api._rootDesc["uuidPrefix"] + if not runtimeContext.project_uuid.startswith(cluster_target): + raise WorkflowException("Project uuid '%s' must be for target cluster '%s'" % (runtimeContext.project_uuid, cluster_target)) + try: + arvrunner.api.groups().get(uuid=runtimeContext.project_uuid).execute() + except Exception as e: + raise WorkflowException("Invalid project uuid '%s': %s" % (runtimeContext.project_uuid, e)) + def set_cluster_target(tool, arvrunner, builder, runtimeContext): cluster_target_req = None for field in ("hints", "requirements"): diff --git a/sdk/cwl/arvados_cwl/fsaccess.py b/sdk/cwl/arvados_cwl/fsaccess.py index 1e339d5bb7..4da8f85569 100644 --- a/sdk/cwl/arvados_cwl/fsaccess.py +++ b/sdk/cwl/arvados_cwl/fsaccess.py @@ -129,7 +129,7 @@ class CollectionFsAccess(cwltool.stdfsaccess.StdFsAccess): def glob(self, pattern): collection, rest = self.get_collection(pattern) - if collection is not None and not rest: + if collection is not None and rest in (None, "", "."): return [pattern] patternsegments = rest.split("/") return sorted(self._match(collection, patternsegments, "keep:" + collection.manifest_locator())) diff --git a/sdk/cwl/tests/17801-runtime-outdir.cwl b/sdk/cwl/tests/17801-runtime-outdir.cwl new file mode 100644 index 0000000000..c01ef057d9 --- /dev/null +++ b/sdk/cwl/tests/17801-runtime-outdir.cwl @@ -0,0 +1,15 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +cwlVersion: v1.2 +class: CommandLineTool +inputs: [] +outputs: + stuff: + type: Directory + outputBinding: + glob: $(runtime.outdir) +requirements: + ShellCommandRequirement: {} +arguments: [{shellQuote: false, valueFrom: "mkdir -p foo && touch baz.txt && touch foo/bar.txt"}] diff --git a/sdk/cwl/tests/arvados-tests.yml b/sdk/cwl/tests/arvados-tests.yml index 41feb7796b..b22c9aaa27 100644 --- a/sdk/cwl/tests/arvados-tests.yml +++ b/sdk/cwl/tests/arvados-tests.yml @@ -393,3 +393,37 @@ } tool: 10380-trailing-slash-dir.cwl doc: "Test issue 10380 - bug with trailing slash when capturing an output directory" + +- job: null + output: { + "stuff": { + "basename": "78f3957c41d044352303a3fa326dff1e+102", + "class": "Directory", + "listing": [ + { + "basename": "baz.txt", + "checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709", + "class": "File", + "location": "78f3957c41d044352303a3fa326dff1e+102/baz.txt", + "size": 0 + }, + { + "basename": "foo", + "class": "Directory", + "listing": [ + { + "basename": "bar.txt", + "checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709", + "class": "File", + "location": "78f3957c41d044352303a3fa326dff1e+102/foo/bar.txt", + "size": 0 + } + ], + "location": "78f3957c41d044352303a3fa326dff1e+102/foo" + } + ], + "location": "78f3957c41d044352303a3fa326dff1e+102" + } + } + tool: 17801-runtime-outdir.cwl + doc: "Test issue 17801 - bug using $(runtime.outdir) to capture the output directory" diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py index 05515e0980..c448f218bc 100644 --- a/sdk/cwl/tests/test_submit.py +++ b/sdk/cwl/tests/test_submit.py @@ -1263,6 +1263,21 @@ class TestSubmit(unittest.TestCase): stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client) self.assertEqual(exited, 1) + @stubs + def test_submit_validate_project_uuid(self, stubs): + exited = arvados_cwl.main( + ["--submit", "--no-wait", "--api=containers", "--debug", "--project-uuid=zzzzb-j7d0g-zzzzzzzzzzzzzzz", + "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"], + stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client) + self.assertEqual(exited, 1) + + stubs.api.groups().get().execute.side_effect = Exception("Bad project") + exited = arvados_cwl.main( + ["--submit", "--no-wait", "--api=containers", "--debug", "--project-uuid=zzzzz-j7d0g-zzzzzzzzzzzzzzx", + "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"], + stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client) + self.assertEqual(exited, 1) + @mock.patch("arvados.collection.CollectionReader") @stubs def test_submit_uuid_inputs(self, stubs, collectionReader): diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go index 403d501b41..6e59828a3c 100644 --- a/sdk/go/arvados/config.go +++ b/sdk/go/arvados/config.go @@ -68,6 +68,16 @@ type WebDAVCacheConfig struct { MaxSessions int } +type UploadDownloadPermission struct { + Upload bool + Download bool +} + +type UploadDownloadRolePermissions struct { + User UploadDownloadPermission + Admin UploadDownloadPermission +} + type Cluster struct { ClusterID string `json:"-"` ManagementToken string @@ -130,6 +140,10 @@ type Cluster struct { BalanceTimeout Duration WebDAVCache WebDAVCacheConfig + + KeepproxyPermission UploadDownloadRolePermissions + WebDAVPermission UploadDownloadRolePermissions + WebDAVLogEvents bool } Git struct { GitCommand string @@ -176,11 +190,6 @@ type Cluster struct { Service string DefaultEmailDomain string } - SSO struct { - Enable bool - ProviderAppID string - ProviderAppSecret string - } Test struct { Enable bool Users map[string]TestUser @@ -327,7 +336,6 @@ type Services struct { Keepproxy Service Keepstore Service RailsAPI Service - SSO Service WebDAVDownload Service WebDAV Service WebShell Service diff --git a/sdk/go/arvados/fs_base.go b/sdk/go/arvados/fs_base.go index 2478641df5..65c207162b 100644 --- a/sdk/go/arvados/fs_base.go +++ b/sdk/go/arvados/fs_base.go @@ -29,12 +29,29 @@ var ( ErrIsDirectory = errors.New("cannot rename file to overwrite existing directory") ErrNotADirectory = errors.New("not a directory") ErrPermission = os.ErrPermission + DebugLocksPanicMode = false ) type syncer interface { Sync() error } +func debugPanicIfNotLocked(l sync.Locker) { + if !DebugLocksPanicMode { + return + } + race := false + go func() { + l.Lock() + race = true + l.Unlock() + }() + time.Sleep(10) + if race { + panic("bug: caller-must-have-lock func called, but nobody has lock") + } +} + // A File is an *os.File-like interface for reading and writing files // in a FileSystem. type File interface { @@ -271,6 +288,7 @@ func (n *treenode) IsDir() bool { } func (n *treenode) Child(name string, replace func(inode) (inode, error)) (child inode, err error) { + debugPanicIfNotLocked(n) child = n.inodes[name] if name == "" || name == "." || name == ".." { err = ErrInvalidArgument @@ -333,6 +351,7 @@ func (n *treenode) Sync() error { func (n *treenode) MemorySize() (size int64) { n.RLock() defer n.RUnlock() + debugPanicIfNotLocked(n) for _, inode := range n.inodes { size += inode.MemorySize() } diff --git a/sdk/go/arvados/fs_collection.go b/sdk/go/arvados/fs_collection.go index 0233826a72..b743ab368e 100644 --- a/sdk/go/arvados/fs_collection.go +++ b/sdk/go/arvados/fs_collection.go @@ -674,6 +674,7 @@ func (dn *dirnode) Child(name string, replace func(inode) (inode, error)) (inode if err != nil { return nil, err } + coll.UUID = dn.fs.uuid data, err := json.Marshal(&coll) if err == nil { data = append(data, '\n') @@ -1167,9 +1168,12 @@ func (dn *dirnode) createFileAndParents(path string) (fn *filenode, err error) { node = node.Parent() continue } + modtime := node.Parent().FileInfo().ModTime() + node.Lock() + locked := node node, err = node.Child(name, func(child inode) (inode, error) { if child == nil { - child, err := node.FS().newNode(name, 0755|os.ModeDir, node.Parent().FileInfo().ModTime()) + child, err := node.FS().newNode(name, 0755|os.ModeDir, modtime) if err != nil { return nil, err } @@ -1181,6 +1185,7 @@ func (dn *dirnode) createFileAndParents(path string) (fn *filenode, err error) { return child, nil } }) + locked.Unlock() if err != nil { return } @@ -1191,10 +1196,13 @@ func (dn *dirnode) createFileAndParents(path string) (fn *filenode, err error) { err = fmt.Errorf("invalid file part %q in path %q", basename, path) return } + modtime := node.FileInfo().ModTime() + node.Lock() + defer node.Unlock() _, err = node.Child(basename, func(child inode) (inode, error) { switch child := child.(type) { case nil: - child, err = node.FS().newNode(basename, 0755, node.FileInfo().ModTime()) + child, err = node.FS().newNode(basename, 0755, modtime) if err != nil { return nil, err } diff --git a/sdk/go/arvados/fs_site.go b/sdk/go/arvados/fs_site.go index 900893aa36..5225df59ee 100644 --- a/sdk/go/arvados/fs_site.go +++ b/sdk/go/arvados/fs_site.go @@ -54,6 +54,8 @@ func (c *Client) CustomFileSystem(kc keepClient) CustomFileSystem { } func (fs *customFileSystem) MountByID(mount string) { + fs.root.treenode.Lock() + defer fs.root.treenode.Unlock() fs.root.treenode.Child(mount, func(inode) (inode, error) { return &vdirnode{ treenode: treenode{ @@ -72,12 +74,16 @@ func (fs *customFileSystem) MountByID(mount string) { } func (fs *customFileSystem) MountProject(mount, uuid string) { + fs.root.treenode.Lock() + defer fs.root.treenode.Unlock() fs.root.treenode.Child(mount, func(inode) (inode, error) { return fs.newProjectNode(fs.root, mount, uuid), nil }) } func (fs *customFileSystem) MountUsers(mount string) { + fs.root.treenode.Lock() + defer fs.root.treenode.Unlock() fs.root.treenode.Child(mount, func(inode) (inode, error) { return &lookupnode{ stale: fs.Stale, diff --git a/sdk/go/arvados/fs_site_test.go b/sdk/go/arvados/fs_site_test.go index b1c627f89c..dc432114a6 100644 --- a/sdk/go/arvados/fs_site_test.go +++ b/sdk/go/arvados/fs_site_test.go @@ -32,6 +32,15 @@ const ( var _ = check.Suite(&SiteFSSuite{}) +func init() { + // Enable DebugLocksPanicMode sometimes. Don't enable it all + // the time, though -- it adds many calls to time.Sleep(), + // which could hide different bugs. + if time.Now().Second()&1 == 0 { + DebugLocksPanicMode = true + } +} + type SiteFSSuite struct { client *Client fs CustomFileSystem diff --git a/sdk/go/arvadostest/fixtures.go b/sdk/go/arvadostest/fixtures.go index a4d7e88b23..4b7ad6dd59 100644 --- a/sdk/go/arvadostest/fixtures.go +++ b/sdk/go/arvadostest/fixtures.go @@ -10,6 +10,7 @@ const ( ActiveToken = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi" ActiveTokenUUID = "zzzzz-gj3su-077z32aux8dg2s1" ActiveTokenV2 = "v2/zzzzz-gj3su-077z32aux8dg2s1/3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi" + AdminUserUUID = "zzzzz-tpzed-d9tiejq69daie8f" AdminToken = "4axaw8zxe0qm22wa6urpp5nskcne8z88cvbupv653y1njyi05h" AdminTokenUUID = "zzzzz-gj3su-027z32aux8dg2s1" AnonymousToken = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi" @@ -30,6 +31,8 @@ const ( UserAgreementPDH = "b519d9cb706a29fc7ea24dbea2f05851+93" HelloWorldPdh = "55713e6a34081eb03609e7ad5fcad129+62" + MultilevelCollection1 = "zzzzz-4zz18-pyw8yp9g3pr7irn" + AProjectUUID = "zzzzz-j7d0g-v955i6s2oi1cbso" ASubprojectUUID = "zzzzz-j7d0g-axqo7eu9pwvna1x" diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py index 9596a2dc2d..ad04807712 100644 --- a/sdk/python/arvados/commands/put.py +++ b/sdk/python/arvados/commands/put.py @@ -173,7 +173,8 @@ Follow file and directory symlinks (default). """) _group.add_argument('--no-follow-links', action='store_false', dest='follow_links', help=""" -Do not follow file and directory symlinks. +Ignore file and directory symlinks. Even paths given explicitly on the +command line will be skipped if they are symlinks. """) @@ -259,9 +260,8 @@ def parse_arguments(arguments): args.paths = ["-" if x == "/dev/stdin" else x for x in args.paths] - if len(args.paths) != 1 or os.path.isdir(args.paths[0]): - if args.filename: - arg_parser.error(""" + if args.filename and (len(args.paths) != 1 or os.path.isdir(args.paths[0])): + arg_parser.error(""" --filename argument cannot be used when storing a directory or multiple files. """) @@ -525,6 +525,9 @@ class ArvPutUploadJob(object): self._write_stdin(self.filename or 'stdin') elif not os.path.exists(path): raise PathDoesNotExistError(u"file or directory '{}' does not exist.".format(path)) + elif (not self.follow_links) and os.path.islink(path): + self.logger.warning("Skipping symlink '{}'".format(path)) + continue elif os.path.isdir(path): # Use absolute paths on cache index so CWD doesn't interfere # with the caching logic. @@ -656,15 +659,14 @@ class ArvPutUploadJob(object): else: # The file already exist on remote collection, skip it. pass - self._remote_collection.save(storage_classes=self.storage_classes, - num_retries=self.num_retries, + self._remote_collection.save(num_retries=self.num_retries, trash_at=self._collection_trash_at()) else: - if self.storage_classes is None: - self.storage_classes = ['default'] + if len(self._local_collection) == 0: + self.logger.warning("No files were uploaded, skipping collection creation.") + return self._local_collection.save_new( name=self.name, owner_uuid=self.owner_uuid, - storage_classes=self.storage_classes, ensure_unique_name=self.ensure_unique_name, num_retries=self.num_retries, trash_at=self._collection_trash_at()) @@ -868,6 +870,7 @@ class ArvPutUploadJob(object): self._remote_collection = arvados.collection.Collection( update_collection, api_client=self._api_client, + storage_classes_desired=self.storage_classes, num_retries=self.num_retries) except arvados.errors.ApiError as error: raise CollectionUpdateError("Cannot read collection {} ({})".format(update_collection, error)) @@ -910,6 +913,7 @@ class ArvPutUploadJob(object): self._local_collection = arvados.collection.Collection( self._state['manifest'], replication_desired=self.replication_desired, + storage_classes_desired=(self.storage_classes or ['default']), put_threads=self.put_threads, api_client=self._api_client, num_retries=self.num_retries) @@ -1197,11 +1201,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr, # Split storage-classes argument storage_classes = None if args.storage_classes: - storage_classes = args.storage_classes.strip().split(',') - if len(storage_classes) > 1: - logger.error("Multiple storage classes are not supported currently.") - sys.exit(1) - + storage_classes = args.storage_classes.strip().replace(' ', '').split(',') # Setup exclude regex from all the --exclude arguments provided name_patterns = [] @@ -1301,7 +1301,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr, output = None try: writer.start(save_collection=not(args.stream or args.raw)) - except arvados.errors.ApiError as error: + except (arvados.errors.ApiError, arvados.errors.KeepWriteError) as error: logger.error("\n".join([ "arv-put: %s" % str(error)])) sys.exit(1) @@ -1316,7 +1316,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr, output = writer.manifest_text() elif args.raw: output = ','.join(writer.data_locators()) - else: + elif writer.manifest_locator() is not None: try: expiration_notice = "" if writer.collection_trash_at() is not None: @@ -1342,6 +1342,8 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr, "arv-put: Error creating Collection on project: {}.".format( error)) status = 1 + else: + status = 1 # Print the locator (uuid) of the new collection. if output is None: diff --git a/sdk/python/tests/run_test_server.py b/sdk/python/tests/run_test_server.py index 917d6100ae..c022e6c874 100644 --- a/sdk/python/tests/run_test_server.py +++ b/sdk/python/tests/run_test_server.py @@ -755,9 +755,6 @@ def setup_config(): "http://%s:%s"%(localhost, keep_web_dl_port): {}, }, }, - "SSO": { - "ExternalURL": "http://localhost:3002", - }, } config = { @@ -769,10 +766,14 @@ def setup_config(): "RequestTimeout": "30s", }, "Login": { - "SSO": { + "Test": { "Enable": True, - "ProviderAppID": "arvados-server", - "ProviderAppSecret": "608dbf356a327e2d0d4932b60161e212c2d8d8f5e25690d7b622f850a990cd33", + "Users": { + "alice": { + "Email": "alice@example.com", + "Password": "xyzzy" + } + } }, }, "SystemLogs": { diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py index e75d39d879..fac970c95a 100644 --- a/sdk/python/tests/test_arv_put.py +++ b/sdk/python/tests/test_arv_put.py @@ -295,6 +295,8 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, shutil.rmtree(self.tempdir_with_symlink) def test_symlinks_are_followed_by_default(self): + self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkeddir'))) + self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkedfile'))) cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink]) cwriter.start(save_collection=False) self.assertIn('linkeddir', cwriter.manifest_text()) @@ -302,12 +304,29 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, cwriter.destroy_cache() def test_symlinks_are_not_followed_when_requested(self): + self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkeddir'))) + self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkedfile'))) cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink], follow_links=False) cwriter.start(save_collection=False) self.assertNotIn('linkeddir', cwriter.manifest_text()) self.assertNotIn('linkedfile', cwriter.manifest_text()) cwriter.destroy_cache() + # Check for bug #17800: passed symlinks should also be ignored. + linked_dir = os.path.join(self.tempdir_with_symlink, 'linkeddir') + cwriter = arv_put.ArvPutUploadJob([linked_dir], follow_links=False) + cwriter.start(save_collection=False) + self.assertNotIn('linkeddir', cwriter.manifest_text()) + cwriter.destroy_cache() + + def test_no_empty_collection_saved(self): + self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkeddir'))) + linked_dir = os.path.join(self.tempdir_with_symlink, 'linkeddir') + cwriter = arv_put.ArvPutUploadJob([linked_dir], follow_links=False) + cwriter.start(save_collection=True) + self.assertIsNone(cwriter.manifest_locator()) + self.assertEqual('', cwriter.manifest_text()) + cwriter.destroy_cache() def test_passing_nonexistant_path_raise_exception(self): uuid_str = str(uuid.uuid4()) @@ -813,11 +832,6 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, self.call_main_with_args, ['--project-uuid', self.Z_UUID, '--stream']) - def test_error_when_multiple_storage_classes_specified(self): - self.assertRaises(SystemExit, - self.call_main_with_args, - ['--storage-classes', 'hot,cold']) - def test_error_when_excluding_absolute_path(self): tmpdir = self.make_tmpdir() self.assertRaises(SystemExit, @@ -1315,13 +1329,16 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, def test_put_collection_with_storage_classes_specified(self): collection = self.run_and_find_collection("", ['--storage-classes', 'hot']) - self.assertEqual(len(collection['storage_classes_desired']), 1) self.assertEqual(collection['storage_classes_desired'][0], 'hot') + def test_put_collection_with_multiple_storage_classes_specified(self): + collection = self.run_and_find_collection("", ['--storage-classes', ' foo, bar ,baz']) + self.assertEqual(len(collection['storage_classes_desired']), 3) + self.assertEqual(collection['storage_classes_desired'], ['foo', 'bar', 'baz']) + def test_put_collection_without_storage_classes_specified(self): collection = self.run_and_find_collection("") - self.assertEqual(len(collection['storage_classes_desired']), 1) self.assertEqual(collection['storage_classes_desired'][0], 'default') diff --git a/services/api/Gemfile b/services/api/Gemfile index ae16581236..39ce5def17 100644 --- a/services/api/Gemfile +++ b/services/api/Gemfile @@ -25,9 +25,6 @@ group :test, :development do gem 'listen' end -# Fast app boot times -gem 'bootsnap', require: false - gem 'pg', '~> 1.0' gem 'multi_json' @@ -47,10 +44,6 @@ gem 'passenger' # Locking to 5.10.3 to workaround issue in 5.11.1 (https://github.com/seattlerb/minitest/issues/730) gem 'minitest', '5.10.3' -# Restricted because omniauth >= 1.5.0 requires Ruby >= 2.1.9: -gem 'omniauth', '~> 1.4.0' -gem 'omniauth-oauth2', '~> 1.1' - gem 'andand' gem 'optimist' diff --git a/services/api/Gemfile.lock b/services/api/Gemfile.lock index 6a26ffd494..ddecd4a18a 100644 --- a/services/api/Gemfile.lock +++ b/services/api/Gemfile.lock @@ -8,43 +8,43 @@ GIT GEM remote: https://rubygems.org/ specs: - actioncable (5.2.4.5) - actionpack (= 5.2.4.5) + actioncable (5.2.6) + actionpack (= 5.2.6) nio4r (~> 2.0) websocket-driver (>= 0.6.1) - actionmailer (5.2.4.5) - actionpack (= 5.2.4.5) - actionview (= 5.2.4.5) - activejob (= 5.2.4.5) + actionmailer (5.2.6) + actionpack (= 5.2.6) + actionview (= 5.2.6) + activejob (= 5.2.6) mail (~> 2.5, >= 2.5.4) rails-dom-testing (~> 2.0) - actionpack (5.2.4.5) - actionview (= 5.2.4.5) - activesupport (= 5.2.4.5) + actionpack (5.2.6) + actionview (= 5.2.6) + activesupport (= 5.2.6) rack (~> 2.0, >= 2.0.8) rack-test (>= 0.6.3) rails-dom-testing (~> 2.0) rails-html-sanitizer (~> 1.0, >= 1.0.2) - actionview (5.2.4.5) - activesupport (= 5.2.4.5) + actionview (5.2.6) + activesupport (= 5.2.6) builder (~> 3.1) erubi (~> 1.4) rails-dom-testing (~> 2.0) rails-html-sanitizer (~> 1.0, >= 1.0.3) - activejob (5.2.4.5) - activesupport (= 5.2.4.5) + activejob (5.2.6) + activesupport (= 5.2.6) globalid (>= 0.3.6) - activemodel (5.2.4.5) - activesupport (= 5.2.4.5) - activerecord (5.2.4.5) - activemodel (= 5.2.4.5) - activesupport (= 5.2.4.5) + activemodel (5.2.6) + activesupport (= 5.2.6) + activerecord (5.2.6) + activemodel (= 5.2.6) + activesupport (= 5.2.6) arel (>= 9.0) - activestorage (5.2.4.5) - actionpack (= 5.2.4.5) - activerecord (= 5.2.4.5) - marcel (~> 0.3.1) - activesupport (5.2.4.5) + activestorage (5.2.6) + actionpack (= 5.2.6) + activerecord (= 5.2.6) + marcel (~> 1.0.0) + activesupport (5.2.6) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (>= 0.7, < 2) minitest (~> 5.1) @@ -80,8 +80,6 @@ GEM addressable (>= 2.3.1) extlib (>= 0.9.15) multi_json (>= 1.0.0) - bootsnap (1.4.7) - msgpack (~> 1.0) builder (3.2.4) byebug (11.0.1) capistrano (2.15.9) @@ -90,7 +88,7 @@ GEM net-sftp (>= 2.0.0) net-ssh (>= 2.0.14) net-ssh-gateway (>= 1.1.0) - concurrent-ruby (1.1.8) + concurrent-ruby (1.1.9) crass (1.0.6) erubi (1.10.0) execjs (2.7.0) @@ -112,7 +110,6 @@ GEM multi_json (~> 1.11) os (>= 0.9, < 2.0) signet (~> 0.7) - hashie (3.6.0) highline (2.0.1) httpclient (2.8.3) i18n (0.9.5) @@ -135,26 +132,21 @@ GEM railties (>= 4) request_store (~> 1.0) logstash-event (1.2.02) - loofah (2.9.0) + loofah (2.10.0) crass (~> 1.0.2) nokogiri (>= 1.5.9) mail (2.7.1) mini_mime (>= 0.1.1) - marcel (0.3.3) - mimemagic (~> 0.3.2) + marcel (1.0.1) memoist (0.16.2) metaclass (0.0.4) method_source (1.0.0) - mimemagic (0.3.8) - nokogiri (~> 1) - mini_mime (1.0.2) - mini_portile2 (2.5.1) + mini_mime (1.1.0) + mini_portile2 (2.5.3) minitest (5.10.3) mocha (1.8.0) metaclass (~> 0.0.1) - msgpack (1.3.3) multi_json (1.15.0) - multi_xml (0.6.0) multipart-post (2.1.1) net-scp (2.0.0) net-ssh (>= 2.6.5, < 6.0.0) @@ -164,22 +156,10 @@ GEM net-ssh-gateway (2.0.0) net-ssh (>= 4.0.0) nio4r (2.5.7) - nokogiri (1.11.5) + nokogiri (1.11.7) mini_portile2 (~> 2.5.0) racc (~> 1.4) - oauth2 (1.4.1) - faraday (>= 0.8, < 0.16.0) - jwt (>= 1.0, < 3.0) - multi_json (~> 1.3) - multi_xml (~> 0.5) - rack (>= 1.2, < 3) oj (3.9.2) - omniauth (1.4.3) - hashie (>= 1.2, < 4) - rack (>= 1.6.2, < 3) - omniauth-oauth2 (1.5.0) - oauth2 (~> 1.1) - omniauth (~> 1.2) optimist (3.0.0) os (1.1.1) passenger (6.0.2) @@ -192,18 +172,18 @@ GEM rack (2.2.3) rack-test (1.1.0) rack (>= 1.0, < 3) - rails (5.2.4.5) - actioncable (= 5.2.4.5) - actionmailer (= 5.2.4.5) - actionpack (= 5.2.4.5) - actionview (= 5.2.4.5) - activejob (= 5.2.4.5) - activemodel (= 5.2.4.5) - activerecord (= 5.2.4.5) - activestorage (= 5.2.4.5) - activesupport (= 5.2.4.5) + rails (5.2.6) + actioncable (= 5.2.6) + actionmailer (= 5.2.6) + actionpack (= 5.2.6) + actionview (= 5.2.6) + activejob (= 5.2.6) + activemodel (= 5.2.6) + activerecord (= 5.2.6) + activestorage (= 5.2.6) + activesupport (= 5.2.6) bundler (>= 1.3.0) - railties (= 5.2.4.5) + railties (= 5.2.6) sprockets-rails (>= 2.0.0) rails-controller-testing (1.0.4) actionpack (>= 5.0.1.x) @@ -217,9 +197,9 @@ GEM rails-observers (0.1.5) activemodel (>= 4.0) rails-perftest (0.0.7) - railties (5.2.4.5) - actionpack (= 5.2.4.5) - activesupport (= 5.2.4.5) + railties (5.2.6) + actionpack (= 5.2.6) + activesupport (= 5.2.6) method_source rake (>= 0.8.7) thor (>= 0.19.0, < 2.0) @@ -281,7 +261,7 @@ GEM uglifier (2.7.2) execjs (>= 0.3.0) json (>= 1.8.0) - websocket-driver (0.7.3) + websocket-driver (0.7.4) websocket-extensions (>= 0.1.0) websocket-extensions (0.1.5) @@ -292,7 +272,6 @@ DEPENDENCIES acts_as_api andand arvados (~> 2.1.5) - bootsnap byebug factory_bot_rails httpclient @@ -304,8 +283,6 @@ DEPENDENCIES mocha multi_json oj - omniauth (~> 1.4.0) - omniauth-oauth2 (~> 1.1) optimist passenger pg (~> 1.0) @@ -328,4 +305,4 @@ DEPENDENCIES uglifier (~> 2.0) BUNDLED WITH - 1.17.3 + 2.2.19 diff --git a/services/api/app/assets/stylesheets/application.css b/services/api/app/assets/stylesheets/application.css index 721ff801c9..18895f6efc 100644 --- a/services/api/app/assets/stylesheets/application.css +++ b/services/api/app/assets/stylesheets/application.css @@ -62,7 +62,7 @@ div#header span.beta > span { border-bottom: 1px solid #fff; font-size: 0.8em; } -img.curoverse-logo { +img.arvados-logo { height: 66px; } #intropage { diff --git a/services/api/app/controllers/application_controller.rb b/services/api/app/controllers/application_controller.rb index e1ae76ed29..fc33dde447 100644 --- a/services/api/app/controllers/application_controller.rb +++ b/services/api/app/controllers/application_controller.rb @@ -397,7 +397,7 @@ class ApplicationController < ActionController::Base if not current_user respond_to do |format| format.json { send_error("Not logged in", status: 401) } - format.html { redirect_to '/auth/joshid' } + format.html { redirect_to '/login' } end false end diff --git a/services/api/app/controllers/user_sessions_controller.rb b/services/api/app/controllers/user_sessions_controller.rb index 8e9a26b7a8..ae34fa7600 100644 --- a/services/api/app/controllers/user_sessions_controller.rb +++ b/services/api/app/controllers/user_sessions_controller.rb @@ -11,7 +11,7 @@ class UserSessionsController < ApplicationController respond_to :html - # omniauth callback method + # create a new session def create if !Rails.configuration.Login.LoginCluster.empty? and Rails.configuration.Login.LoginCluster != Rails.configuration.ClusterID raise "Local login disabled when LoginCluster is set" @@ -27,9 +27,7 @@ class UserSessionsController < ApplicationController authinfo = SafeJSON.load(params[:auth_info]) max_expires_at = authinfo["expires_at"] else - # omniauth middleware verified the user and is passing auth_info - # in request.env. - authinfo = request.env['omniauth.auth']['info'].with_indifferent_access + return send_error "Legacy code path no longer supported", status: 404 end if !authinfo['user_uuid'].blank? @@ -92,19 +90,17 @@ class UserSessionsController < ApplicationController flash[:notice] = params[:message] end - # logout - Clear our rack session BUT essentially redirect to the provider - # to clean up the Devise session from there too ! + # logout - this gets intercepted by controller, so this is probably + # mostly dead code at this point. def logout session[:user_id] = nil flash[:notice] = 'You have logged off' return_to = params[:return_to] || root_url - redirect_to "#{Rails.configuration.Services.SSO.ExternalURL}users/sign_out?redirect_uri=#{CGI.escape return_to}" + redirect_to return_to end - # login - Just bounce to /auth/joshid. The only purpose of this function is - # to save the return_to parameter (if it exists; see the application - # controller). /auth/joshid bypasses the application controller. + # login. Redirect to LoginCluster. def login if params[:remote] !~ /^[0-9a-z]{5}$/ && !params[:remote].nil? return send_error 'Invalid remote cluster id', status: 400 @@ -136,13 +132,7 @@ class UserSessionsController < ApplicationController p << "return_to=#{CGI.escape(params[:return_to])}" if params[:return_to] redirect_to "#{login_cluster}/login?#{p.join('&')}" else - if params[:return_to] - # Encode remote param inside callback's return_to, so that we'll get it on - # create() after login. - remote_param = params[:remote].nil? ? '' : params[:remote] - p << "return_to=#{CGI.escape(remote_param + ',' + params[:return_to])}" - end - redirect_to "/auth/joshid?#{p.join('&')}" + return send_error "Legacy code path no longer supported", status: 404 end end diff --git a/services/api/app/models/container_request.rb b/services/api/app/models/container_request.rb index 837f2cdc70..e712acc6e9 100644 --- a/services/api/app/models/container_request.rb +++ b/services/api/app/models/container_request.rb @@ -394,6 +394,20 @@ class ContainerRequest < ArvadosModel if self.new_record? || self.state_was == Uncommitted # Allow create-and-commit in a single operation. permitted.push(*AttrsPermittedBeforeCommit) + elsif mounts_changed? && mounts_was.keys == mounts.keys + # Ignore the updated mounts if the only changes are default/zero + # values as added by controller, see 17774 + only_defaults = true + mounts.each do |path, mount| + (mount.to_a - mounts_was[path].to_a).each do |k, v| + if !["", false, nil].index(v) + only_defaults = false + end + end + end + if only_defaults + clear_attribute_change("mounts") + end end case self.state diff --git a/services/api/app/views/layouts/application.html.erb b/services/api/app/views/layouts/application.html.erb index a99b6f165d..b4f60de346 100644 --- a/services/api/app/views/layouts/application.html.erb +++ b/services/api/app/views/layouts/application.html.erb @@ -23,8 +23,6 @@ SPDX-License-Identifier: AGPL-3.0 %> <% end %>  •  Log out - <% else %> - <% end %> <% if current_user and session[:real_uid] and session[:switch_back_to] and User.find(session[:real_uid].to_i).verify_userswitch_cookie(session[:switch_back_to]) %> @@ -41,7 +39,6 @@ SPDX-License-Identifier: AGPL-3.0 %> <% if current_user or session['invite_code'] %> <% end %> diff --git a/services/api/app/views/static/intro.html.erb b/services/api/app/views/static/intro.html.erb index bdefaa5c1f..8bab0b2ac6 100644 --- a/services/api/app/views/static/intro.html.erb +++ b/services/api/app/views/static/intro.html.erb @@ -8,30 +8,23 @@ $(function(){ }); <% end %>
- +

Welcome

-

Curoverse ARVADOS

+

ARVADOS

<% if !current_user and session['invite_code'] %> -

Curoverse Arvados lets you manage and process human genomes and exomes. You can start using the private beta - now with your Google account.

+

Arvados lets you manage and process biomedical data.

- +

<% else %> -

Curoverse ARVADOS is transforming how researchers and - clinical geneticists use whole genome sequences.

-

If you’re interested in learning more, we’d love to hear - from you — - contact arvados@curoverse.com.

- <% if !current_user %>

- Log in here. + Log in here.

<% end %> diff --git a/services/api/app/views/static/login_failure.html.erb b/services/api/app/views/static/login_failure.html.erb index b3c6e70d90..6b81a33e87 100644 --- a/services/api/app/views/static/login_failure.html.erb +++ b/services/api/app/views/static/login_failure.html.erb @@ -10,7 +10,7 @@ $(function(){
- +

Error

diff --git a/services/api/app/views/user_sessions/failure.html.erb b/services/api/app/views/user_sessions/failure.html.erb index 81c5be27c6..e8c5b08465 100644 --- a/services/api/app/views/user_sessions/failure.html.erb +++ b/services/api/app/views/user_sessions/failure.html.erb @@ -7,4 +7,4 @@ SPDX-License-Identifier: AGPL-3.0 %> <%= notice %>
-Retry Login +Retry Login diff --git a/services/api/config/arvados_config.rb b/services/api/config/arvados_config.rb index 2c259919ae..a6f1730e86 100644 --- a/services/api/config/arvados_config.rb +++ b/services/api/config/arvados_config.rb @@ -28,22 +28,6 @@ rescue LoadError # configured by application.yml (i.e., here!) instead. end -if (File.exist?(File.expand_path '../omniauth.rb', __FILE__) and - not defined? WARNED_OMNIAUTH_CONFIG) - Rails.logger.warn <<-EOS -DEPRECATED CONFIGURATION: - Please move your SSO provider config into config/application.yml - and delete config/initializers/omniauth.rb. -EOS - # Real values will be copied from globals by omniauth_init.rb. For - # now, assign some strings so the generic *.yml config loader - # doesn't overwrite them or complain that they're missing. - Rails.configuration.Login["SSO"]["ProviderAppID"] = 'xxx' - Rails.configuration.Login["SSO"]["ProviderAppSecret"] = 'xxx' - Rails.configuration.Services["SSO"]["ExternalURL"] = '//xxx' - WARNED_OMNIAUTH_CONFIG = true -end - # Load the defaults, used by config:migrate and fallback loading # legacy application.yml defaultYAML, stderr, status = Open3.capture3("arvados-server", "config-dump", "-config=-", "-skip-legacy", stdin_data: "Clusters: {xxxxx: {}}") @@ -114,14 +98,11 @@ arvcfg.declare_config "Users.EmailSubjectPrefix", String, :email_subject_prefix arvcfg.declare_config "Users.UserNotifierEmailFrom", String, :user_notifier_email_from arvcfg.declare_config "Users.NewUserNotificationRecipients", Hash, :new_user_notification_recipients, ->(cfg, k, v) { arrayToHash cfg, "Users.NewUserNotificationRecipients", v } arvcfg.declare_config "Users.NewInactiveUserNotificationRecipients", Hash, :new_inactive_user_notification_recipients, method(:arrayToHash) -arvcfg.declare_config "Login.SSO.ProviderAppSecret", String, :sso_app_secret -arvcfg.declare_config "Login.SSO.ProviderAppID", String, :sso_app_id arvcfg.declare_config "Login.LoginCluster", String arvcfg.declare_config "Login.TrustedClients", Hash arvcfg.declare_config "Login.RemoteTokenRefresh", ActiveSupport::Duration arvcfg.declare_config "Login.TokenLifetime", ActiveSupport::Duration arvcfg.declare_config "TLS.Insecure", Boolean, :sso_insecure -arvcfg.declare_config "Services.SSO.ExternalURL", String, :sso_provider_url arvcfg.declare_config "AuditLogs.MaxAge", ActiveSupport::Duration, :max_audit_log_age arvcfg.declare_config "AuditLogs.MaxDeleteBatch", Integer, :max_audit_log_delete_batch arvcfg.declare_config "AuditLogs.UnloggedAttributes", Hash, :unlogged_attributes, ->(cfg, k, v) { arrayToHash cfg, "AuditLogs.UnloggedAttributes", v } diff --git a/services/api/config/boot.rb b/services/api/config/boot.rb index 9605b584e9..8087911837 100644 --- a/services/api/config/boot.rb +++ b/services/api/config/boot.rb @@ -6,4 +6,3 @@ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../Gemfile', __dir__) require 'bundler/setup' # Set up gems listed in the Gemfile. -require 'bootsnap/setup' # Speed up boot time by caching expensive operations. \ No newline at end of file diff --git a/services/api/config/environment.rb b/services/api/config/environment.rb index b82ba27f9a..cd706940a3 100644 --- a/services/api/config/environment.rb +++ b/services/api/config/environment.rb @@ -4,7 +4,6 @@ # Load the rails application require_relative 'application' -require 'josh_id' # Initialize the rails application Rails.application.initialize! diff --git a/services/api/config/initializers/omniauth_init.rb b/services/api/config/initializers/omniauth_init.rb deleted file mode 100644 index a1b2356bd5..0000000000 --- a/services/api/config/initializers/omniauth_init.rb +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) The Arvados Authors. All rights reserved. -# -# SPDX-License-Identifier: AGPL-3.0 - -# This file is called omniauth_init.rb instead of omniauth.rb because -# older versions had site configuration in omniauth.rb. -# -# It must come after omniauth.rb in (lexical) load order. - -if defined? CUSTOM_PROVIDER_URL - Rails.logger.warn "Copying omniauth from globals in legacy config file." - Rails.configuration.Login["SSO"]["ProviderAppID"] = APP_ID - Rails.configuration.Login["SSO"]["ProviderAppSecret"] = APP_SECRET - Rails.configuration.Services["SSO"]["ExternalURL"] = CUSTOM_PROVIDER_URL.sub(/\/$/, "") + "/" -else - Rails.application.config.middleware.use OmniAuth::Builder do - provider(:josh_id, - Rails.configuration.Login["SSO"]["ProviderAppID"], - Rails.configuration.Login["SSO"]["ProviderAppSecret"], - Rails.configuration.Services["SSO"]["ExternalURL"]) - end - OmniAuth.config.on_failure = StaticController.action(:login_failure) -end diff --git a/services/api/lib/josh_id.rb b/services/api/lib/josh_id.rb deleted file mode 100644 index f18c0edda0..0000000000 --- a/services/api/lib/josh_id.rb +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (C) The Arvados Authors. All rights reserved. -# -# SPDX-License-Identifier: AGPL-3.0 - -require 'omniauth-oauth2' -module OmniAuth - module Strategies - class JoshId < OmniAuth::Strategies::OAuth2 - - args [:client_id, :client_secret, :custom_provider_url] - - option :custom_provider_url, '' - - uid { raw_info['id'] } - - option :client_options, {} - - info do - { - :first_name => raw_info['info']['first_name'], - :last_name => raw_info['info']['last_name'], - :email => raw_info['info']['email'], - :identity_url => raw_info['info']['identity_url'], - :username => raw_info['info']['username'], - } - end - - extra do - { - 'raw_info' => raw_info - } - end - - def authorize_params - options.authorize_params[:auth_provider] = request.params['auth_provider'] - super - end - - def client - options.client_options[:site] = options[:custom_provider_url] - options.client_options[:authorize_url] = "#{options[:custom_provider_url]}/auth/josh_id/authorize" - options.client_options[:access_token_url] = "#{options[:custom_provider_url]}/auth/josh_id/access_token" - if Rails.configuration.TLS.Insecure - options.client_options[:ssl] = {verify_mode: OpenSSL::SSL::VERIFY_NONE} - end - ::OAuth2::Client.new(options.client_id, options.client_secret, deep_symbolize(options.client_options)) - end - - def callback_url - full_host + script_name + callback_path + "?return_to=" + CGI.escape(request.params['return_to'] || '') - end - - def raw_info - @raw_info ||= access_token.get("/auth/josh_id/user.json?oauth_token=#{access_token.token}").parsed - end - end - end -end diff --git a/services/api/test/fixtures/container_requests.yml b/services/api/test/fixtures/container_requests.yml index 1c62605029..5be132ac30 100644 --- a/services/api/test/fixtures/container_requests.yml +++ b/services/api/test/fixtures/container_requests.yml @@ -37,10 +37,13 @@ running: output_path: test command: ["echo", "hello"] container_uuid: zzzzz-dz642-runningcontainr + mounts: + /tmp: + kind: tmp + capacity: 24000000000 runtime_constraints: vcpus: 1 ram: 123 - mounts: {} requester_for_running: uuid: zzzzz-xvhdp-req4runningcntr @@ -58,10 +61,13 @@ requester_for_running: command: ["echo", "hello"] container_uuid: zzzzz-dz642-logscontainer03 requesting_container_uuid: zzzzz-dz642-runningcontainr + mounts: + /tmp: + kind: tmp + capacity: 24000000000 runtime_constraints: vcpus: 1 ram: 123 - mounts: {} running_older: uuid: zzzzz-xvhdp-cr4runningcntn2 @@ -78,10 +84,13 @@ running_older: output_path: test command: ["echo", "hello"] container_uuid: zzzzz-dz642-runningcontain2 + mounts: + /tmp: + kind: tmp + capacity: 24000000000 runtime_constraints: vcpus: 1 ram: 123 - mounts: {} completed: uuid: zzzzz-xvhdp-cr4completedctr @@ -429,10 +438,13 @@ running_anonymous_accessible: output_path: test command: ["echo", "hello"] container_uuid: zzzzz-dz642-runningcontain2 + mounts: + /tmp: + kind: tmp + capacity: 24000000000 runtime_constraints: vcpus: 1 ram: 123 - mounts: {} cr_for_failed: uuid: zzzzz-xvhdp-cr4failedcontnr @@ -509,10 +521,13 @@ canceled_with_running_container: output_path: test command: ["echo", "hello"] container_uuid: zzzzz-dz642-runningcontainr + mounts: + /tmp: + kind: tmp + capacity: 24000000000 runtime_constraints: vcpus: 1 ram: 123 - mounts: {} running_to_be_deleted: uuid: zzzzz-xvhdp-cr5runningcntnr @@ -528,11 +543,14 @@ running_to_be_deleted: cwd: test output_path: test command: ["echo", "hello"] + mounts: + /tmp: + kind: tmp + capacity: 24000000000 container_uuid: zzzzz-dz642-runnincntrtodel runtime_constraints: vcpus: 1 ram: 123 - mounts: {} completed_with_input_mounts: uuid: zzzzz-xvhdp-crwithinputmnts diff --git a/services/api/test/fixtures/containers.yml b/services/api/test/fixtures/containers.yml index b7d082771a..e7cd0abd1f 100644 --- a/services/api/test/fixtures/containers.yml +++ b/services/api/test/fixtures/containers.yml @@ -33,12 +33,16 @@ running: updated_at: <%= 1.minute.ago.to_s(:db) %> started_at: <%= 1.minute.ago.to_s(:db) %> container_image: test - cwd: test - output_path: test + cwd: /tmp + output_path: /tmp command: ["echo", "hello"] runtime_constraints: ram: 12000000000 vcpus: 4 + mounts: + /tmp: + kind: tmp + capacity: 24000000000 secret_mounts: /secret/6x9: kind: text @@ -55,9 +59,13 @@ running_older: updated_at: <%= 2.minute.ago.to_s(:db) %> started_at: <%= 2.minute.ago.to_s(:db) %> container_image: test - cwd: test - output_path: test + cwd: /tmp + output_path: /tmp command: ["echo", "hello"] + mounts: + /tmp: + kind: tmp + capacity: 24000000000 runtime_constraints: ram: 12000000000 vcpus: 4 @@ -383,6 +391,10 @@ running_container_with_logs: cwd: test output_path: test command: ["echo", "hello"] + mounts: + /tmp: + kind: tmp + capacity: 24000000000 runtime_constraints: ram: 12000000000 vcpus: 4 @@ -401,6 +413,10 @@ running_to_be_deleted: cwd: test output_path: test command: ["echo", "hello"] + mounts: + /tmp: + kind: tmp + capacity: 24000000000 runtime_constraints: ram: 12000000000 vcpus: 4 diff --git a/services/api/test/functional/user_sessions_controller_test.rb b/services/api/test/functional/user_sessions_controller_test.rb index 1f91968932..66aff787bd 100644 --- a/services/api/test/functional/user_sessions_controller_test.rb +++ b/services/api/test/functional/user_sessions_controller_test.rb @@ -9,9 +9,8 @@ class UserSessionsControllerTest < ActionController::TestCase test "redirect to joshid" do api_client_page = 'http://client.example.com/home' get :login, params: {return_to: api_client_page} - assert_response :redirect - assert_equal("http://test.host/auth/joshid?return_to=%2Chttp%3A%2F%2Fclient.example.com%2Fhome", @response.redirect_url) - assert_nil assigns(:api_client) + # Not supported any more + assert_response 404 end test "send token when user is already logged in" do @@ -107,9 +106,8 @@ class UserSessionsControllerTest < ActionController::TestCase Rails.configuration.Login.LoginCluster = 'zzzzz' api_client_page = 'http://client.example.com/home' get :login, params: {return_to: api_client_page} - assert_response :redirect - assert_equal("http://test.host/auth/joshid?return_to=%2Chttp%3A%2F%2Fclient.example.com%2Fhome", @response.redirect_url) - assert_nil assigns(:api_client) + # Doesn't redirect, just fail. + assert_response 404 end test "controller cannot create session without SystemRootToken" do diff --git a/services/api/test/integration/login_workflow_test.rb b/services/api/test/integration/login_workflow_test.rb index f0741fcfde..ba3b2ac6e3 100644 --- a/services/api/test/integration/login_workflow_test.rb +++ b/services/api/test/integration/login_workflow_test.rb @@ -30,7 +30,7 @@ class LoginWorkflowTest < ActionDispatch::IntegrationTest params: {specimen: {}}, headers: {'HTTP_ACCEPT' => 'text/html'}) assert_response 302 - assert_match(%r{/auth/joshid$}, @response.headers['Location'], + assert_match(%r{http://www.example.com/login$}, @response.headers['Location'], "HTML login prompt did not include expected redirect") end end diff --git a/services/api/test/integration/user_sessions_test.rb b/services/api/test/integration/user_sessions_test.rb index 6e951499ad..76659f3207 100644 --- a/services/api/test/integration/user_sessions_test.rb +++ b/services/api/test/integration/user_sessions_test.rb @@ -15,21 +15,17 @@ class UserSessionsApiTest < ActionDispatch::IntegrationTest def mock_auth_with(email: nil, username: nil, identity_url: nil, remote: nil, expected_response: :redirect) mock = { - 'provider' => 'josh_id', - 'uid' => 'https://edward.example.com', - 'info' => { 'identity_url' => 'https://edward.example.com', 'name' => 'Edward Example', 'first_name' => 'Edward', 'last_name' => 'Example', - }, } - mock['info']['email'] = email unless email.nil? - mock['info']['username'] = username unless username.nil? - mock['info']['identity_url'] = identity_url unless identity_url.nil? - post('/auth/josh_id/callback', - params: {return_to: client_url(remote: remote)}, - headers: {'omniauth.auth' => mock}) + mock['email'] = email unless email.nil? + mock['username'] = username unless username.nil? + mock['identity_url'] = identity_url unless identity_url.nil? + post('/auth/controller/callback', + params: {return_to: client_url(remote: remote), :auth_info => SafeJSON.dump(mock)}, + headers: {'Authorization' => 'Bearer ' + Rails.configuration.SystemRootToken}) errors = { :redirect => 'Did not redirect to client with token', diff --git a/services/api/test/test_helper.rb b/services/api/test/test_helper.rb index ee7dac4cd9..843d4f1b23 100644 --- a/services/api/test/test_helper.rb +++ b/services/api/test/test_helper.rb @@ -25,7 +25,6 @@ unless ENV["NO_COVERAGE_TEST"] SimpleCov.start do add_filter '/test/' add_filter 'initializers/secret_token' - add_filter 'initializers/omniauth' end rescue Exception => e $stderr.puts "SimpleCov unavailable (#{e}). Proceeding without." diff --git a/services/api/test/unit/container_request_test.rb b/services/api/test/unit/container_request_test.rb index b2dde79956..2d5c735181 100644 --- a/services/api/test/unit/container_request_test.rb +++ b/services/api/test/unit/container_request_test.rb @@ -1071,6 +1071,31 @@ class ContainerRequestTest < ActiveSupport::TestCase ['Committed', false, {container_count: 2}], ['Committed', false, {container_count: 0}], ['Committed', false, {container_count: nil}], + ['Committed', true, {priority: 0, mounts: {"/out" => {"kind" => "tmp", "capacity" => 1000000}}}], + ['Committed', true, {priority: 0, mounts: {"/out" => {"capacity" => 1000000, "kind" => "tmp"}}}], + # Addition of default values for mounts / runtime_constraints / + # scheduling_parameters, as happens in a round-trip through + # controller, does not have any real effect and should be + # accepted/ignored rather than causing an error when the CR state + # dictates those attributes are not allowed to change. + ['Committed', true, {priority: 0, mounts: {"/out" => {"capacity" => 1000000, "kind" => "tmp", "exclude_from_output": false}}}], + ['Committed', true, {priority: 0, mounts: {"/out" => {"capacity" => 1000000, "kind" => "tmp", "repository_name": ""}}}], + ['Committed', true, {priority: 0, mounts: {"/out" => {"capacity" => 1000000, "kind" => "tmp", "content": nil}}}], + ['Committed', false, {priority: 0, mounts: {"/out" => {"capacity" => 1000000, "kind" => "tmp", "content": {}}}}], + ['Committed', false, {priority: 0, mounts: {"/out" => {"capacity" => 1000000, "kind" => "tmp", "repository_name": "foo"}}}], + ['Committed', false, {priority: 0, mounts: {"/out" => {"kind" => "tmp", "capacity" => 1234567}}}], + ['Committed', false, {priority: 0, mounts: {}}], + ['Committed', true, {priority: 0, runtime_constraints: {"vcpus" => 1, "ram" => 2}}], + ['Committed', true, {priority: 0, runtime_constraints: {"vcpus" => 1, "ram" => 2, "keep_cache_ram" => 0}}], + ['Committed', true, {priority: 0, runtime_constraints: {"vcpus" => 1, "ram" => 2, "API" => false}}], + ['Committed', false, {priority: 0, runtime_constraints: {"vcpus" => 1, "ram" => 2, "keep_cache_ram" => 1}}], + ['Committed', false, {priority: 0, runtime_constraints: {"vcpus" => 1, "ram" => 2, "API" => true}}], + ['Committed', true, {priority: 0, scheduling_parameters: {"preemptible" => false}}], + ['Committed', true, {priority: 0, scheduling_parameters: {"partitions" => []}}], + ['Committed', true, {priority: 0, scheduling_parameters: {"max_run_time" => 0}}], + ['Committed', false, {priority: 0, scheduling_parameters: {"preemptible" => true}}], + ['Committed', false, {priority: 0, scheduling_parameters: {"partitions" => ["foo"]}}], + ['Committed', false, {priority: 0, scheduling_parameters: {"max_run_time" => 1}}], ['Final', false, {state: ContainerRequest::Committed, name: "foobar"}], ['Final', false, {name: "foobar", priority: 123}], ['Final', false, {name: "foobar", output_uuid: "zzzzz-4zz18-znfnqtbbv4spc3w"}], diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py index 3a0316cf9e..1696f856a5 100644 --- a/services/fuse/arvados_fuse/__init__.py +++ b/services/fuse/arvados_fuse/__init__.py @@ -61,24 +61,16 @@ from builtins import next from builtins import str from builtins import object import os -import sys import llfuse import errno import stat import threading import arvados -import pprint import arvados.events -import re -import apiclient -import json import logging import time -import _strptime -import calendar import threading import itertools -import ciso8601 import collections import functools import arvados.keep diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py index 7bef8a269f..67a2aaa4da 100644 --- a/services/fuse/arvados_fuse/command.py +++ b/services/fuse/arvados_fuse/command.py @@ -95,6 +95,7 @@ class ArgumentParser(argparse.ArgumentParser): self.add_argument('--read-only', action='store_false', help="Mount will be read only (default)", dest="enable_write", default=False) self.add_argument('--read-write', action='store_true', help="Mount will be read-write", dest="enable_write", default=False) + self.add_argument('--storage-classes', type=str, metavar='CLASSES', help="Specify comma separated list of storage classes to be used when saving data of new collections", default=None) self.add_argument('--crunchstat-interval', type=float, help="Write stats to stderr every N seconds (default disabled)", default=0) @@ -246,6 +247,11 @@ class Mount(object): dir_args = [llfuse.ROOT_INODE, self.operations.inodes, self.api, self.args.retries] mount_readme = False + storage_classes = None + if self.args.storage_classes is not None: + storage_classes = self.args.storage_classes.replace(' ', '').split(',') + self.logger.info("Storage classes requested for new collections: {}".format(', '.join(storage_classes))) + if self.args.collection is not None: # Set up the request handler with the collection at the root # First check that the collection is readable @@ -295,7 +301,10 @@ class Mount(object): mount_readme = True if dir_class is not None: - ent = dir_class(*dir_args) + if dir_class in [TagsDirectory, CollectionDirectory]: + ent = dir_class(*dir_args) + else: + ent = dir_class(*dir_args, storage_classes=storage_classes) self.operations.inodes.add_entry(ent) self.listen_for_events = ent.want_event_subscribe() return @@ -305,17 +314,17 @@ class Mount(object): dir_args[0] = e.inode for name in self.args.mount_by_id: - self._add_mount(e, name, MagicDirectory(*dir_args, pdh_only=False)) + self._add_mount(e, name, MagicDirectory(*dir_args, pdh_only=False, storage_classes=storage_classes)) for name in self.args.mount_by_pdh: self._add_mount(e, name, MagicDirectory(*dir_args, pdh_only=True)) for name in self.args.mount_by_tag: self._add_mount(e, name, TagsDirectory(*dir_args)) for name in self.args.mount_home: - self._add_mount(e, name, ProjectDirectory(*dir_args, project_object=usr, poll=True)) + self._add_mount(e, name, ProjectDirectory(*dir_args, project_object=usr, poll=True, storage_classes=storage_classes)) for name in self.args.mount_shared: - self._add_mount(e, name, SharedDirectory(*dir_args, exclude=usr, poll=True)) + self._add_mount(e, name, SharedDirectory(*dir_args, exclude=usr, poll=True, storage_classes=storage_classes)) for name in self.args.mount_tmp: - self._add_mount(e, name, TmpCollectionDirectory(*dir_args)) + self._add_mount(e, name, TmpCollectionDirectory(*dir_args, storage_classes=storage_classes)) if mount_readme: text = self._readme_text( diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py index a2e3ac139e..78cbd0d8cf 100644 --- a/services/fuse/arvados_fuse/fusedir.py +++ b/services/fuse/arvados_fuse/fusedir.py @@ -487,6 +487,8 @@ class CollectionDirectory(CollectionDirectoryBase): new_collection_record["portable_data_hash"] = new_collection_record["uuid"] if 'manifest_text' not in new_collection_record: new_collection_record['manifest_text'] = coll_reader.manifest_text() + if 'storage_classes_desired' not in new_collection_record: + new_collection_record['storage_classes_desired'] = coll_reader.storage_classes_desired() if self.collection_record is None or self.collection_record["portable_data_hash"] != new_collection_record.get("portable_data_hash"): self.new_collection(new_collection_record, coll_reader) @@ -571,11 +573,12 @@ class TmpCollectionDirectory(CollectionDirectoryBase): def save_new(self): pass - def __init__(self, parent_inode, inodes, api_client, num_retries): + def __init__(self, parent_inode, inodes, api_client, num_retries, storage_classes=None): collection = self.UnsaveableCollection( api_client=api_client, keep_client=api_client.keep, - num_retries=num_retries) + num_retries=num_retries, + storage_classes_desired=storage_classes) super(TmpCollectionDirectory, self).__init__( parent_inode, inodes, api_client.config, collection) self.collection_record_file = None @@ -595,6 +598,7 @@ class TmpCollectionDirectory(CollectionDirectoryBase): "uuid": None, "manifest_text": self.collection.manifest_text(), "portable_data_hash": self.collection.portable_data_hash(), + "storage_classes_desired": self.collection.storage_classes_desired(), } def __contains__(self, k): @@ -653,11 +657,12 @@ and the directory will appear if it exists. """.lstrip() - def __init__(self, parent_inode, inodes, api, num_retries, pdh_only=False): + def __init__(self, parent_inode, inodes, api, num_retries, pdh_only=False, storage_classes=None): super(MagicDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries self.pdh_only = pdh_only + self.storage_classes = storage_classes def __setattr__(self, name, value): super(MagicDirectory, self).__setattr__(name, value) @@ -687,7 +692,8 @@ and the directory will appear if it exists. if project[u'items_available'] == 0: return False e = self.inodes.add_entry(ProjectDirectory( - self.inode, self.inodes, self.api, self.num_retries, project[u'items'][0])) + self.inode, self.inodes, self.api, self.num_retries, + project[u'items'][0], storage_classes=self.storage_classes)) else: e = self.inodes.add_entry(CollectionDirectory( self.inode, self.inodes, self.api, self.num_retries, k)) @@ -811,7 +817,7 @@ class ProjectDirectory(Directory): """A special directory that contains the contents of a project.""" def __init__(self, parent_inode, inodes, api, num_retries, project_object, - poll=True, poll_time=3): + poll=True, poll_time=3, storage_classes=None): super(ProjectDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries @@ -823,6 +829,7 @@ class ProjectDirectory(Directory): self._updating_lock = threading.Lock() self._current_user = None self._full_listing = False + self.storage_classes = storage_classes def want_event_subscribe(self): return True @@ -831,7 +838,7 @@ class ProjectDirectory(Directory): if collection_uuid_pattern.match(i['uuid']): return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i) elif group_uuid_pattern.match(i['uuid']): - return ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i, self._poll, self._poll_time) + return ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i, self._poll, self._poll_time, self.storage_classes) elif link_uuid_pattern.match(i['uuid']): if i['head_kind'] == 'arvados#collection' or portable_data_hash_pattern.match(i['head_uuid']): return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i['head_uuid']) @@ -982,9 +989,16 @@ class ProjectDirectory(Directory): def mkdir(self, name): try: with llfuse.lock_released: - self.api.collections().create(body={"owner_uuid": self.project_uuid, - "name": name, - "manifest_text": ""}).execute(num_retries=self.num_retries) + c = { + "owner_uuid": self.project_uuid, + "name": name, + "manifest_text": "" } + if self.storage_classes is not None: + c["storage_classes_desired"] = self.storage_classes + try: + self.api.collections().create(body=c).execute(num_retries=self.num_retries) + except Exception as e: + raise self.invalidate() except apiclient_errors.Error as error: _logger.error(error) @@ -1079,7 +1093,7 @@ class SharedDirectory(Directory): """A special directory that represents users or groups who have shared projects with me.""" def __init__(self, parent_inode, inodes, api, num_retries, exclude, - poll=False, poll_time=60): + poll=False, poll_time=60, storage_classes=None): super(SharedDirectory, self).__init__(parent_inode, inodes, api.config) self.api = api self.num_retries = num_retries @@ -1087,6 +1101,7 @@ class SharedDirectory(Directory): self._poll = True self._poll_time = poll_time self._updating_lock = threading.Lock() + self.storage_classes = storage_classes @use_counter def update(self): @@ -1156,8 +1171,6 @@ class SharedDirectory(Directory): obr = objects[r] if obr.get("name"): contents[obr["name"]] = obr - #elif obr.get("username"): - # contents[obr["username"]] = obr elif "first_name" in obr: contents[u"{} {}".format(obr["first_name"], obr["last_name"])] = obr @@ -1172,7 +1185,7 @@ class SharedDirectory(Directory): self.merge(viewitems(contents), lambda i: i[0], lambda a, i: a.uuid() == i[1]['uuid'], - lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i[1], poll=self._poll, poll_time=self._poll_time)) + lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i[1], poll=self._poll, poll_time=self._poll_time, storage_classes=self.storage_classes)) except Exception: _logger.exception("arv-mount shared dir error") finally: diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py index 54316bb9a9..82e5c441eb 100644 --- a/services/fuse/tests/test_mount.py +++ b/services/fuse/tests/test_mount.py @@ -22,6 +22,7 @@ from . import run_test_server from .integration_test import IntegrationTest from .mount_test_base import MountTestBase +from .test_tmp_collection import storage_classes_desired logger = logging.getLogger('arvados.arv-mount') @@ -1262,3 +1263,31 @@ class SlashSubstitutionTest(IntegrationTest): def _test_slash_substitution_conflict(self, tmpdir, fusename): with open(os.path.join(tmpdir, fusename, 'waz'), 'w') as f: f.write('foo') + +class StorageClassesTest(IntegrationTest): + mnt_args = [ + '--read-write', + '--mount-home', 'homedir', + ] + + def setUp(self): + super(StorageClassesTest, self).setUp() + self.api = arvados.safeapi.ThreadSafeApiCache(arvados.config.settings()) + + @IntegrationTest.mount(argv=mnt_args) + def test_collection_default_storage_classes(self): + coll_path = os.path.join(self.mnt, 'homedir', 'a_collection') + self.api.collections().create(body={'name':'a_collection'}).execute() + self.pool_test(coll_path) + @staticmethod + def _test_collection_default_storage_classes(self, coll): + self.assertEqual(storage_classes_desired(coll), ['default']) + + @IntegrationTest.mount(argv=mnt_args+['--storage-classes', 'foo']) + def test_collection_custom_storage_classes(self): + coll_path = os.path.join(self.mnt, 'homedir', 'new_coll') + os.mkdir(coll_path) + self.pool_test(coll_path) + @staticmethod + def _test_collection_custom_storage_classes(self, coll): + self.assertEqual(storage_classes_desired(coll), ['foo']) diff --git a/services/fuse/tests/test_tmp_collection.py b/services/fuse/tests/test_tmp_collection.py index 50075c96ae..c59024267a 100644 --- a/services/fuse/tests/test_tmp_collection.py +++ b/services/fuse/tests/test_tmp_collection.py @@ -58,6 +58,9 @@ def current_manifest(tmpdir): with open(os.path.join(tmpdir, '.arvados#collection')) as tmp: return json.load(tmp)['manifest_text'] +def storage_classes_desired(tmpdir): + with open(os.path.join(tmpdir, '.arvados#collection')) as tmp: + return json.load(tmp)['storage_classes_desired'] class TmpCollectionTest(IntegrationTest): mnt_args = [ @@ -65,6 +68,13 @@ class TmpCollectionTest(IntegrationTest): '--mount-tmp', 'zzz', ] + @IntegrationTest.mount(argv=mnt_args+['--storage-classes', 'foo, bar']) + def test_storage_classes(self): + self.pool_test(os.path.join(self.mnt, 'zzz')) + @staticmethod + def _test_storage_classes(self, zzz): + self.assertEqual(storage_classes_desired(zzz), ['foo', 'bar']) + @IntegrationTest.mount(argv=mnt_args+['--mount-tmp', 'yyy']) def test_two_tmp(self): self.pool_test(os.path.join(self.mnt, 'zzz'), diff --git a/services/keep-web/cache.go b/services/keep-web/cache.go index 9bdecdca1c..a52af80484 100644 --- a/services/keep-web/cache.go +++ b/services/keep-web/cache.go @@ -131,8 +131,12 @@ type cachedPermission struct { } type cachedSession struct { - expire time.Time - fs atomic.Value + expire time.Time + fs atomic.Value + client *arvados.Client + arvadosclient *arvadosclient.ArvadosClient + keepclient *keepclient.KeepClient + user atomic.Value } func (c *cache) setup() { @@ -213,7 +217,7 @@ func (c *cache) ResetSession(token string) { // Get a long-lived CustomFileSystem suitable for doing a read operation // with the given token. -func (c *cache) GetSession(token string) (arvados.CustomFileSystem, error) { +func (c *cache) GetSession(token string) (arvados.CustomFileSystem, *cachedSession, error) { c.setupOnce.Do(c.setup) now := time.Now() ent, _ := c.sessions.Get(token) @@ -224,6 +228,17 @@ func (c *cache) GetSession(token string) (arvados.CustomFileSystem, error) { sess = &cachedSession{ expire: now.Add(c.config.TTL.Duration()), } + var err error + sess.client, err = arvados.NewClientFromConfig(c.cluster) + if err != nil { + return nil, nil, err + } + sess.client.AuthToken = token + sess.arvadosclient, err = arvadosclient.New(sess.client) + if err != nil { + return nil, nil, err + } + sess.keepclient = keepclient.New(sess.arvadosclient) c.sessions.Add(token, sess) } else if sess.expire.Before(now) { c.metrics.sessionMisses.Inc() @@ -234,22 +249,12 @@ func (c *cache) GetSession(token string) (arvados.CustomFileSystem, error) { go c.pruneSessions() fs, _ := sess.fs.Load().(arvados.CustomFileSystem) if fs != nil && !expired { - return fs, nil + return fs, sess, nil } - ac, err := arvados.NewClientFromConfig(c.cluster) - if err != nil { - return nil, err - } - ac.AuthToken = token - arv, err := arvadosclient.New(ac) - if err != nil { - return nil, err - } - kc := keepclient.New(arv) - fs = ac.SiteFileSystem(kc) + fs = sess.client.SiteFileSystem(sess.keepclient) fs.ForwardSlashNameSubstitution(c.cluster.Collections.ForwardSlashNameSubstitution) sess.fs.Store(fs) - return fs, nil + return fs, sess, nil } // Remove all expired session cache entries, then remove more entries @@ -464,3 +469,35 @@ func (c *cache) lookupCollection(key string) *arvados.Collection { c.metrics.collectionHits.Inc() return ent.collection } + +func (c *cache) GetTokenUser(token string) (*arvados.User, error) { + // Get and cache user record associated with this + // token. We need to know their UUID for logging, and + // whether they are an admin or not for certain + // permission checks. + + // Get/create session entry + _, sess, err := c.GetSession(token) + if err != nil { + return nil, err + } + + // See if the user is already set, and if so, return it + user, _ := sess.user.Load().(*arvados.User) + if user != nil { + return user, nil + } + + // Fetch the user record + c.metrics.apiCalls.Inc() + var current arvados.User + + err = sess.client.RequestAndDecode(¤t, "GET", "/arvados/v1/users/current", nil, nil) + if err != nil { + return nil, err + } + + // Stash the user record for next time + sess.user.Store(¤t) + return ¤t, nil +} diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go index 81925421dc..97ec95e3aa 100644 --- a/services/keep-web/handler.go +++ b/services/keep-web/handler.go @@ -398,6 +398,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { defer h.clientPool.Put(arv) var collection *arvados.Collection + var tokenUser *arvados.User tokenResult := make(map[string]int) for _, arv.ApiToken = range tokens { var err error @@ -483,7 +484,17 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { return } + // Check configured permission + _, sess, err := h.Config.Cache.GetSession(arv.ApiToken) + tokenUser, err = h.Config.Cache.GetTokenUser(arv.ApiToken) + if webdavMethod[r.Method] { + if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) { + http.Error(w, "Not permitted", http.StatusForbidden) + return + } + h.logUploadOrDownload(r, sess.arvadosclient, nil, strings.Join(targetPath, "/"), collection, tokenUser) + if writeMethod[r.Method] { // Save the collection only if/when all // webdav->filesystem operations succeed -- @@ -538,6 +549,12 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { } else if stat.IsDir() { h.serveDirectory(w, r, collection.Name, fs, openPath, true) } else { + if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) { + http.Error(w, "Not permitted", http.StatusForbidden) + return + } + h.logUploadOrDownload(r, sess.arvadosclient, nil, strings.Join(targetPath, "/"), collection, tokenUser) + http.ServeContent(w, r, basename, stat.ModTime(), f) if wrote := int64(w.WroteBodyBytes()); wrote != stat.Size() && w.WroteStatus() == http.StatusOK { // If we wrote fewer bytes than expected, it's @@ -583,7 +600,8 @@ func (h *handler) serveSiteFS(w http.ResponseWriter, r *http.Request, tokens []s http.Error(w, errReadOnly.Error(), http.StatusMethodNotAllowed) return } - fs, err := h.Config.Cache.GetSession(tokens[0]) + + fs, sess, err := h.Config.Cache.GetSession(tokens[0]) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return @@ -606,6 +624,14 @@ func (h *handler) serveSiteFS(w http.ResponseWriter, r *http.Request, tokens []s } return } + + tokenUser, err := h.Config.Cache.GetTokenUser(tokens[0]) + if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) { + http.Error(w, "Not permitted", http.StatusForbidden) + return + } + h.logUploadOrDownload(r, sess.arvadosclient, fs, r.URL.Path, nil, tokenUser) + if r.Method == "GET" { _, basename := filepath.Split(r.URL.Path) applyContentDispositionHdr(w, r, basename, attachment) @@ -836,3 +862,117 @@ func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, loc io.WriteString(w, html.EscapeString(redir)) io.WriteString(w, `">Continue`) } + +func (h *handler) userPermittedToUploadOrDownload(method string, tokenUser *arvados.User) bool { + var permitDownload bool + var permitUpload bool + if tokenUser != nil && tokenUser.IsAdmin { + permitUpload = h.Config.cluster.Collections.WebDAVPermission.Admin.Upload + permitDownload = h.Config.cluster.Collections.WebDAVPermission.Admin.Download + } else { + permitUpload = h.Config.cluster.Collections.WebDAVPermission.User.Upload + permitDownload = h.Config.cluster.Collections.WebDAVPermission.User.Download + } + if (method == "PUT" || method == "POST") && !permitUpload { + // Disallow operations that upload new files. + // Permit webdav operations that move existing files around. + return false + } else if method == "GET" && !permitDownload { + // Disallow downloading file contents. + // Permit webdav operations like PROPFIND that retrieve metadata + // but not file contents. + return false + } + return true +} + +func (h *handler) logUploadOrDownload( + r *http.Request, + client *arvadosclient.ArvadosClient, + fs arvados.CustomFileSystem, + filepath string, + collection *arvados.Collection, + user *arvados.User) { + + log := ctxlog.FromContext(r.Context()) + props := make(map[string]string) + props["reqPath"] = r.URL.Path + var useruuid string + if user != nil { + log = log.WithField("user_uuid", user.UUID). + WithField("user_full_name", user.FullName) + useruuid = user.UUID + } else { + useruuid = fmt.Sprintf("%s-tpzed-anonymouspublic", h.Config.cluster.ClusterID) + } + if collection == nil && fs != nil { + collection, filepath = h.determineCollection(fs, filepath) + } + if collection != nil { + log = log.WithField("collection_uuid", collection.UUID). + WithField("collection_file_path", filepath) + props["collection_uuid"] = collection.UUID + props["collection_file_path"] = filepath + } + if r.Method == "PUT" || r.Method == "POST" { + log.Info("File upload") + if h.Config.cluster.Collections.WebDAVLogEvents { + go func() { + lr := arvadosclient.Dict{"log": arvadosclient.Dict{ + "object_uuid": useruuid, + "event_type": "file_upload", + "properties": props}} + err := client.Create("logs", lr, nil) + if err != nil { + log.WithError(err).Error("Failed to create upload log event on API server") + } + }() + } + } else if r.Method == "GET" { + if collection != nil && collection.PortableDataHash != "" { + log = log.WithField("portable_data_hash", collection.PortableDataHash) + props["portable_data_hash"] = collection.PortableDataHash + } + log.Info("File download") + if h.Config.cluster.Collections.WebDAVLogEvents { + go func() { + lr := arvadosclient.Dict{"log": arvadosclient.Dict{ + "object_uuid": useruuid, + "event_type": "file_download", + "properties": props}} + err := client.Create("logs", lr, nil) + if err != nil { + log.WithError(err).Error("Failed to create download log event on API server") + } + }() + } + } +} + +func (h *handler) determineCollection(fs arvados.CustomFileSystem, path string) (*arvados.Collection, string) { + segments := strings.Split(path, "/") + var i int + for i = 0; i < len(segments); i++ { + dir := append([]string{}, segments[0:i]...) + dir = append(dir, ".arvados#collection") + f, err := fs.OpenFile(strings.Join(dir, "/"), os.O_RDONLY, 0) + if f != nil { + defer f.Close() + } + if err != nil { + if !os.IsNotExist(err) { + return nil, "" + } + continue + } + // err is nil so we found it. + decoder := json.NewDecoder(f) + var collection arvados.Collection + err = decoder.Decode(&collection) + if err != nil { + return nil, "" + } + return &collection, strings.Join(segments[i:], "/") + } + return nil, "" +} diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go index 446d591bfd..e883e806cc 100644 --- a/services/keep-web/handler_test.go +++ b/services/keep-web/handler_test.go @@ -9,6 +9,7 @@ import ( "context" "fmt" "html" + "io" "io/ioutil" "net/http" "net/http/httptest" @@ -32,6 +33,10 @@ import ( var _ = check.Suite(&UnitSuite{}) +func init() { + arvados.DebugLocksPanicMode = true +} + type UnitSuite struct { Config *arvados.Config } @@ -88,8 +93,9 @@ func (s *UnitSuite) TestEmptyResponse(c *check.C) { // If we return no content because the client sent an // If-Modified-Since header, our response should be - // 304, and we should not emit a log message. - {true, true, http.StatusNotModified, ``}, + // 304. We still expect a "File download" log since it + // counts as a file access for auditing. + {true, true, http.StatusNotModified, `(?ms).*msg="File download".*`}, } { c.Logf("trial: %+v", trial) arvadostest.StartKeep(2, true) @@ -1181,3 +1187,187 @@ func copyHeader(h http.Header) http.Header { } return hc } + +func (s *IntegrationSuite) checkUploadDownloadRequest(c *check.C, h *handler, req *http.Request, + successCode int, direction string, perm bool, userUuid string, collectionUuid string, filepath string) { + + client := s.testServer.Config.Client + client.AuthToken = arvadostest.AdminToken + var logentries arvados.LogList + limit1 := 1 + err := client.RequestAndDecode(&logentries, "GET", "arvados/v1/logs", nil, + arvados.ResourceListParams{ + Limit: &limit1, + Order: "created_at desc"}) + c.Check(err, check.IsNil) + c.Check(logentries.Items, check.HasLen, 1) + lastLogId := logentries.Items[0].ID + nextLogId := lastLogId + + var logbuf bytes.Buffer + logger := logrus.New() + logger.Out = &logbuf + resp := httptest.NewRecorder() + req = req.WithContext(ctxlog.Context(context.Background(), logger)) + h.ServeHTTP(resp, req) + + if perm { + c.Check(resp.Result().StatusCode, check.Equals, successCode) + c.Check(logbuf.String(), check.Matches, `(?ms).*msg="File `+direction+`".*`) + c.Check(logbuf.String(), check.Not(check.Matches), `(?ms).*level=error.*`) + + count := 0 + for ; nextLogId == lastLogId && count < 20; count++ { + time.Sleep(50 * time.Millisecond) + err = client.RequestAndDecode(&logentries, "GET", "arvados/v1/logs", nil, + arvados.ResourceListParams{ + Filters: []arvados.Filter{arvados.Filter{Attr: "event_type", Operator: "=", Operand: "file_" + direction}}, + Limit: &limit1, + Order: "created_at desc", + }) + c.Check(err, check.IsNil) + if len(logentries.Items) > 0 { + nextLogId = logentries.Items[0].ID + } + } + c.Check(count, check.Not(check.Equals), 20) + c.Check(logentries.Items[0].ObjectUUID, check.Equals, userUuid) + c.Check(logentries.Items[0].Properties["collection_uuid"], check.Equals, collectionUuid) + c.Check(logentries.Items[0].Properties["collection_file_path"], check.Equals, filepath) + } else { + c.Check(resp.Result().StatusCode, check.Equals, http.StatusForbidden) + c.Check(logbuf.String(), check.Equals, "") + } +} + +func (s *IntegrationSuite) TestDownloadLoggingPermission(c *check.C) { + config := newConfig(s.ArvConfig) + h := handler{Config: config} + u := mustParseURL("http://" + arvadostest.FooCollection + ".keep-web.example/foo") + + config.cluster.Collections.TrustAllContent = true + + for _, adminperm := range []bool{true, false} { + for _, userperm := range []bool{true, false} { + config.cluster.Collections.WebDAVPermission.Admin.Download = adminperm + config.cluster.Collections.WebDAVPermission.User.Download = userperm + + // Test admin permission + req := &http.Request{ + Method: "GET", + Host: u.Host, + URL: u, + RequestURI: u.RequestURI(), + Header: http.Header{ + "Authorization": {"Bearer " + arvadostest.AdminToken}, + }, + } + s.checkUploadDownloadRequest(c, &h, req, http.StatusOK, "download", adminperm, + arvadostest.AdminUserUUID, arvadostest.FooCollection, "foo") + + // Test user permission + req = &http.Request{ + Method: "GET", + Host: u.Host, + URL: u, + RequestURI: u.RequestURI(), + Header: http.Header{ + "Authorization": {"Bearer " + arvadostest.ActiveToken}, + }, + } + s.checkUploadDownloadRequest(c, &h, req, http.StatusOK, "download", userperm, + arvadostest.ActiveUserUUID, arvadostest.FooCollection, "foo") + } + } + + config.cluster.Collections.WebDAVPermission.User.Download = true + + for _, tryurl := range []string{"http://" + arvadostest.MultilevelCollection1 + ".keep-web.example/dir1/subdir/file1", + "http://keep-web/users/active/multilevel_collection_1/dir1/subdir/file1"} { + + u = mustParseURL(tryurl) + req := &http.Request{ + Method: "GET", + Host: u.Host, + URL: u, + RequestURI: u.RequestURI(), + Header: http.Header{ + "Authorization": {"Bearer " + arvadostest.ActiveToken}, + }, + } + s.checkUploadDownloadRequest(c, &h, req, http.StatusOK, "download", true, + arvadostest.ActiveUserUUID, arvadostest.MultilevelCollection1, "dir1/subdir/file1") + } + + u = mustParseURL("http://" + strings.Replace(arvadostest.FooCollectionPDH, "+", "-", 1) + ".keep-web.example/foo") + req := &http.Request{ + Method: "GET", + Host: u.Host, + URL: u, + RequestURI: u.RequestURI(), + Header: http.Header{ + "Authorization": {"Bearer " + arvadostest.ActiveToken}, + }, + } + s.checkUploadDownloadRequest(c, &h, req, http.StatusOK, "download", true, + arvadostest.ActiveUserUUID, arvadostest.FooCollection, "foo") +} + +func (s *IntegrationSuite) TestUploadLoggingPermission(c *check.C) { + config := newConfig(s.ArvConfig) + h := handler{Config: config} + + for _, adminperm := range []bool{true, false} { + for _, userperm := range []bool{true, false} { + + arv := s.testServer.Config.Client + arv.AuthToken = arvadostest.ActiveToken + + var coll arvados.Collection + err := arv.RequestAndDecode(&coll, + "POST", + "/arvados/v1/collections", + nil, + map[string]interface{}{ + "ensure_unique_name": true, + "collection": map[string]interface{}{ + "name": "test collection", + }, + }) + c.Assert(err, check.Equals, nil) + + u := mustParseURL("http://" + coll.UUID + ".keep-web.example/bar") + + config.cluster.Collections.WebDAVPermission.Admin.Upload = adminperm + config.cluster.Collections.WebDAVPermission.User.Upload = userperm + + // Test admin permission + req := &http.Request{ + Method: "PUT", + Host: u.Host, + URL: u, + RequestURI: u.RequestURI(), + Header: http.Header{ + "Authorization": {"Bearer " + arvadostest.AdminToken}, + }, + Body: io.NopCloser(bytes.NewReader([]byte("bar"))), + } + s.checkUploadDownloadRequest(c, &h, req, http.StatusCreated, "upload", adminperm, + arvadostest.AdminUserUUID, coll.UUID, "bar") + + // Test user permission + req = &http.Request{ + Method: "PUT", + Host: u.Host, + URL: u, + RequestURI: u.RequestURI(), + Header: http.Header{ + "Authorization": {"Bearer " + arvadostest.ActiveToken}, + }, + Body: io.NopCloser(bytes.NewReader([]byte("bar"))), + } + s.checkUploadDownloadRequest(c, &h, req, http.StatusCreated, "upload", userperm, + arvadostest.ActiveUserUUID, coll.UUID, "bar") + } + } +} diff --git a/services/keep-web/s3.go b/services/keep-web/s3.go index f03ff01b81..e6262374d6 100644 --- a/services/keep-web/s3.go +++ b/services/keep-web/s3.go @@ -24,7 +24,9 @@ import ( "time" "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/arvadosclient" "git.arvados.org/arvados.git/sdk/go/ctxlog" + "git.arvados.org/arvados.git/sdk/go/keepclient" "github.com/AdRoll/goamz/s3" ) @@ -136,15 +138,39 @@ func s3stringToSign(alg, scope, signedHeaders string, r *http.Request) (string, } } - normalizedURL := *r.URL - normalizedURL.RawPath = "" - normalizedURL.Path = reMultipleSlashChars.ReplaceAllString(normalizedURL.Path, "/") - ctxlog.FromContext(r.Context()).Infof("escapedPath %s", normalizedURL.EscapedPath()) - canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", r.Method, normalizedURL.EscapedPath(), s3querystring(r.URL), canonicalHeaders, signedHeaders, r.Header.Get("X-Amz-Content-Sha256")) + normalizedPath := normalizePath(r.URL.Path) + ctxlog.FromContext(r.Context()).Debugf("normalizedPath %q", normalizedPath) + canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", r.Method, normalizedPath, s3querystring(r.URL), canonicalHeaders, signedHeaders, r.Header.Get("X-Amz-Content-Sha256")) ctxlog.FromContext(r.Context()).Debugf("s3stringToSign: canonicalRequest %s", canonicalRequest) return fmt.Sprintf("%s\n%s\n%s\n%s", alg, r.Header.Get("X-Amz-Date"), scope, hashdigest(sha256.New(), canonicalRequest)), nil } +func normalizePath(s string) string { + // (url.URL).EscapedPath() would be incorrect here. AWS + // documentation specifies the URL path should be normalized + // according to RFC 3986, i.e., unescaping ALPHA / DIGIT / "-" + // / "." / "_" / "~". The implication is that everything other + // than those chars (and "/") _must_ be percent-encoded -- + // even chars like ";" and "," that are not normally + // percent-encoded in paths. + out := "" + for _, c := range []byte(reMultipleSlashChars.ReplaceAllString(s, "/")) { + if (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + c == '-' || + c == '.' || + c == '_' || + c == '~' || + c == '/' { + out += string(c) + } else { + out += fmt.Sprintf("%%%02X", c) + } + } + return out +} + func s3signature(secretKey, scope, signedHeaders, stringToSign string) (string, error) { // scope is {datestamp}/{region}/{service}/aws4_request drs := strings.Split(scope, "/") @@ -285,19 +311,25 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool { var err error var fs arvados.CustomFileSystem + var arvclient *arvadosclient.ArvadosClient if r.Method == http.MethodGet || r.Method == http.MethodHead { // Use a single session (cached FileSystem) across // multiple read requests. - fs, err = h.Config.Cache.GetSession(token) + var sess *cachedSession + fs, sess, err = h.Config.Cache.GetSession(token) if err != nil { s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError) return true } + arvclient = sess.arvadosclient } else { // Create a FileSystem for this request, to avoid // exposing incomplete write operations to concurrent // requests. - _, kc, client, release, err := h.getClients(r.Header.Get("X-Request-Id"), token) + var kc *keepclient.KeepClient + var release func() + var client *arvados.Client + arvclient, kc, client, release, err = h.getClients(r.Header.Get("X-Request-Id"), token) if err != nil { s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusInternalServerError) return true @@ -372,6 +404,14 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool { s3ErrorResponse(w, NoSuchKey, "The specified key does not exist.", r.URL.Path, http.StatusNotFound) return true } + + tokenUser, err := h.Config.Cache.GetTokenUser(token) + if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) { + http.Error(w, "Not permitted", http.StatusForbidden) + return true + } + h.logUploadOrDownload(r, arvclient, fs, fspath, nil, tokenUser) + // shallow copy r, and change URL path r := *r r.URL.Path = fspath @@ -455,6 +495,14 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool { return true } defer f.Close() + + tokenUser, err := h.Config.Cache.GetTokenUser(token) + if !h.userPermittedToUploadOrDownload(r.Method, tokenUser) { + http.Error(w, "Not permitted", http.StatusForbidden) + return true + } + h.logUploadOrDownload(r, arvclient, fs, fspath, nil, tokenUser) + _, err = io.Copy(f, r.Body) if err != nil { err = fmt.Errorf("write to %q failed: %w", r.URL.Path, err) diff --git a/services/keep-web/s3_test.go b/services/keep-web/s3_test.go index 4f70168b56..f411fde871 100644 --- a/services/keep-web/s3_test.go +++ b/services/keep-web/s3_test.go @@ -558,12 +558,15 @@ func (s *IntegrationSuite) TestS3NormalizeURIForSignature(c *check.C) { rawPath string normalizedPath string }{ - {"/foo", "/foo"}, // boring case - {"/foo%5fbar", "/foo_bar"}, // _ must not be escaped - {"/foo%2fbar", "/foo/bar"}, // / must not be escaped - {"/(foo)", "/%28foo%29"}, // () must be escaped - {"/foo%5bbar", "/foo%5Bbar"}, // %XX must be uppercase + {"/foo", "/foo"}, // boring case + {"/foo%5fbar", "/foo_bar"}, // _ must not be escaped + {"/foo%2fbar", "/foo/bar"}, // / must not be escaped + {"/(foo)/[];,", "/%28foo%29/%5B%5D%3B%2C"}, // ()[];, must be escaped + {"/foo%5bbar", "/foo%5Bbar"}, // %XX must be uppercase + {"//foo///.bar", "/foo/.bar"}, // "//" and "///" must be squashed to "/" } { + c.Logf("trial %q", trial) + date := time.Now().UTC().Format("20060102T150405Z") scope := "20200202/zzzzz/S3/aws4_request" canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", "GET", trial.normalizedPath, "", "host:host.example.com\n", "host", "") @@ -1098,6 +1101,15 @@ func (s *IntegrationSuite) TestS3cmd(c *check.C) { buf, err := cmd.CombinedOutput() c.Check(err, check.IsNil) c.Check(string(buf), check.Matches, `.* 3 +s3://`+arvadostest.FooCollection+`/foo\n`) + + // This tests whether s3cmd's path normalization agrees with + // keep-web's signature verification wrt chars like "|" + // (neither reserved nor unreserved) and "," (not normally + // percent-encoded in a path). + cmd = exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.Addr, "--host-bucket="+s.testServer.Addr, "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "get", "s3://"+arvadostest.FooCollection+"/foo,;$[|]bar") + buf, err = cmd.CombinedOutput() + c.Check(err, check.NotNil) + c.Check(string(buf), check.Matches, `(?ms).*NoSuchKey.*\n`) } func (s *IntegrationSuite) TestS3BucketInHost(c *check.C) { diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go index 5c68eb4249..a65a48892a 100644 --- a/services/keep-web/server_test.go +++ b/services/keep-web/server_test.go @@ -34,6 +34,7 @@ var _ = check.Suite(&IntegrationSuite{}) // IntegrationSuite tests need an API server and a keep-web server type IntegrationSuite struct { testServer *server + ArvConfig *arvados.Config } func (s *IntegrationSuite) TestNoToken(c *check.C) { @@ -389,7 +390,7 @@ func (s *IntegrationSuite) TestMetrics(c *check.C) { c.Check(summaries["request_duration_seconds/get/404"].SampleCount, check.Equals, "1") c.Check(summaries["time_to_status_seconds/get/404"].SampleCount, check.Equals, "1") c.Check(counters["arvados_keepweb_collectioncache_requests//"].Value, check.Equals, int64(2)) - c.Check(counters["arvados_keepweb_collectioncache_api_calls//"].Value, check.Equals, int64(1)) + c.Check(counters["arvados_keepweb_collectioncache_api_calls//"].Value, check.Equals, int64(2)) c.Check(counters["arvados_keepweb_collectioncache_hits//"].Value, check.Equals, int64(1)) c.Check(counters["arvados_keepweb_collectioncache_pdh_hits//"].Value, check.Equals, int64(1)) c.Check(counters["arvados_keepweb_collectioncache_permission_hits//"].Value, check.Equals, int64(1)) @@ -446,6 +447,7 @@ func (s *IntegrationSuite) SetUpTest(c *check.C) { cfg.cluster.ManagementToken = arvadostest.ManagementToken cfg.cluster.SystemRootToken = arvadostest.SystemRootToken cfg.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken + s.ArvConfig = arvCfg s.testServer = &server{Config: cfg} err = s.testServer.Start(ctxlog.TestLogger(c)) c.Assert(err, check.Equals, nil) diff --git a/services/keepproxy/keepproxy.go b/services/keepproxy/keepproxy.go index 538a061227..c679e5b91c 100644 --- a/services/keepproxy/keepproxy.go +++ b/services/keepproxy/keepproxy.go @@ -16,7 +16,6 @@ import ( "os/signal" "regexp" "strings" - "sync" "syscall" "time" @@ -29,6 +28,7 @@ import ( "github.com/coreos/go-systemd/daemon" "github.com/ghodss/yaml" "github.com/gorilla/mux" + lru "github.com/hashicorp/golang-lru" log "github.com/sirupsen/logrus" ) @@ -163,45 +163,53 @@ func run(logger log.FieldLogger, cluster *arvados.Cluster) error { signal.Notify(term, syscall.SIGINT) // Start serving requests. - router = MakeRESTRouter(kc, time.Duration(keepclient.DefaultProxyRequestTimeout), cluster.ManagementToken) + router, err = MakeRESTRouter(kc, time.Duration(keepclient.DefaultProxyRequestTimeout), cluster, logger) + if err != nil { + return err + } return http.Serve(listener, httpserver.AddRequestIDs(httpserver.LogRequests(router))) } +type TokenCacheEntry struct { + expire int64 + user *arvados.User +} + type APITokenCache struct { - tokens map[string]int64 - lock sync.Mutex + tokens *lru.TwoQueueCache expireTime int64 } -// RememberToken caches the token and set an expire time. If we already have -// an expire time on the token, it is not updated. -func (cache *APITokenCache) RememberToken(token string) { - cache.lock.Lock() - defer cache.lock.Unlock() - +// RememberToken caches the token and set an expire time. If the +// token is already in the cache, it is not updated. +func (cache *APITokenCache) RememberToken(token string, user *arvados.User) { now := time.Now().Unix() - if cache.tokens[token] == 0 { - cache.tokens[token] = now + cache.expireTime + _, ok := cache.tokens.Get(token) + if !ok { + cache.tokens.Add(token, TokenCacheEntry{ + expire: now + cache.expireTime, + user: user, + }) } } // RecallToken checks if the cached token is known and still believed to be // valid. -func (cache *APITokenCache) RecallToken(token string) bool { - cache.lock.Lock() - defer cache.lock.Unlock() +func (cache *APITokenCache) RecallToken(token string) (bool, *arvados.User) { + val, ok := cache.tokens.Get(token) + if !ok { + return false, nil + } + cacheEntry := val.(TokenCacheEntry) now := time.Now().Unix() - if cache.tokens[token] == 0 { - // Unknown token - return false - } else if now < cache.tokens[token] { + if now < cacheEntry.expire { // Token is known and still valid - return true + return true, cacheEntry.user } else { // Token is expired - cache.tokens[token] = 0 - return false + cache.tokens.Remove(token) + return false, nil } } @@ -216,10 +224,10 @@ func GetRemoteAddress(req *http.Request) string { return req.RemoteAddr } -func CheckAuthorizationHeader(kc *keepclient.KeepClient, cache *APITokenCache, req *http.Request) (pass bool, tok string) { +func (h *proxyHandler) CheckAuthorizationHeader(req *http.Request) (pass bool, tok string, user *arvados.User) { parts := strings.SplitN(req.Header.Get("Authorization"), " ", 2) if len(parts) < 2 || !(parts[0] == "OAuth2" || parts[0] == "Bearer") || len(parts[1]) == 0 { - return false, "" + return false, "", nil } tok = parts[1] @@ -234,29 +242,56 @@ func CheckAuthorizationHeader(kc *keepclient.KeepClient, cache *APITokenCache, r op = "write" } - if cache.RecallToken(op + ":" + tok) { + if ok, user := h.APITokenCache.RecallToken(op + ":" + tok); ok { // Valid in the cache, short circuit - return true, tok + return true, tok, user } var err error - arv := *kc.Arvados + arv := *h.KeepClient.Arvados arv.ApiToken = tok arv.RequestID = req.Header.Get("X-Request-Id") - if op == "read" { - err = arv.Call("HEAD", "keep_services", "", "accessible", nil, nil) - } else { - err = arv.Call("HEAD", "users", "", "current", nil, nil) + user = &arvados.User{} + userCurrentError := arv.Call("GET", "users", "", "current", nil, user) + err = userCurrentError + if err != nil && op == "read" { + apiError, ok := err.(arvadosclient.APIServerError) + if ok && apiError.HttpStatusCode == http.StatusForbidden { + // If it was a scoped "sharing" token it will + // return 403 instead of 401 for the current + // user check. If it is a download operation + // and they have permission to read the + // keep_services table, we can allow it. + err = arv.Call("HEAD", "keep_services", "", "accessible", nil, nil) + } } if err != nil { log.Printf("%s: CheckAuthorizationHeader error: %v", GetRemoteAddress(req), err) - return false, "" + return false, "", nil + } + + if userCurrentError == nil && user.IsAdmin { + // checking userCurrentError is probably redundant, + // IsAdmin would be false anyway. But can't hurt. + if op == "read" && !h.cluster.Collections.KeepproxyPermission.Admin.Download { + return false, "", nil + } + if op == "write" && !h.cluster.Collections.KeepproxyPermission.Admin.Upload { + return false, "", nil + } + } else { + if op == "read" && !h.cluster.Collections.KeepproxyPermission.User.Download { + return false, "", nil + } + if op == "write" && !h.cluster.Collections.KeepproxyPermission.User.Upload { + return false, "", nil + } } // Success! Update cache - cache.RememberToken(op + ":" + tok) + h.APITokenCache.RememberToken(op+":"+tok, user) - return true, tok + return true, tok, user } // We need to make a private copy of the default http transport early @@ -273,11 +308,13 @@ type proxyHandler struct { *APITokenCache timeout time.Duration transport *http.Transport + logger log.FieldLogger + cluster *arvados.Cluster } // MakeRESTRouter returns an http.Handler that passes GET and PUT // requests to the appropriate handlers. -func MakeRESTRouter(kc *keepclient.KeepClient, timeout time.Duration, mgmtToken string) http.Handler { +func MakeRESTRouter(kc *keepclient.KeepClient, timeout time.Duration, cluster *arvados.Cluster, logger log.FieldLogger) (http.Handler, error) { rest := mux.NewRouter() transport := defaultTransport @@ -289,15 +326,22 @@ func MakeRESTRouter(kc *keepclient.KeepClient, timeout time.Duration, mgmtToken transport.TLSClientConfig = arvadosclient.MakeTLSConfig(kc.Arvados.ApiInsecure) transport.TLSHandshakeTimeout = keepclient.DefaultTLSHandshakeTimeout + cacheQ, err := lru.New2Q(500) + if err != nil { + return nil, fmt.Errorf("Error from lru.New2Q: %v", err) + } + h := &proxyHandler{ Handler: rest, KeepClient: kc, timeout: timeout, transport: &transport, APITokenCache: &APITokenCache{ - tokens: make(map[string]int64), + tokens: cacheQ, expireTime: 300, }, + logger: logger, + cluster: cluster, } rest.HandleFunc(`/{locator:[0-9a-f]{32}\+.*}`, h.Get).Methods("GET", "HEAD") @@ -316,19 +360,19 @@ func MakeRESTRouter(kc *keepclient.KeepClient, timeout time.Duration, mgmtToken rest.HandleFunc(`/`, h.Options).Methods("OPTIONS") rest.Handle("/_health/{check}", &health.Handler{ - Token: mgmtToken, + Token: cluster.ManagementToken, Prefix: "/_health/", }).Methods("GET") rest.NotFoundHandler = InvalidPathHandler{} - return h + return h, nil } var errLoopDetected = errors.New("loop detected") -func (*proxyHandler) checkLoop(resp http.ResponseWriter, req *http.Request) error { +func (h *proxyHandler) checkLoop(resp http.ResponseWriter, req *http.Request) error { if via := req.Header.Get("Via"); strings.Index(via, " "+viaAlias) >= 0 { - log.Printf("proxy loop detected (request has Via: %q): perhaps keepproxy is misidentified by gateway config as an external client, or its keep_services record does not have service_type=proxy?", via) + h.logger.Printf("proxy loop detected (request has Via: %q): perhaps keepproxy is misidentified by gateway config as an external client, or its keep_services record does not have service_type=proxy?", via) http.Error(resp, errLoopDetected.Error(), http.StatusInternalServerError) return errLoopDetected } @@ -354,7 +398,7 @@ func (h *proxyHandler) Options(resp http.ResponseWriter, req *http.Request) { SetCorsHeaders(resp) } -var errBadAuthorizationHeader = errors.New("Missing or invalid Authorization header") +var errBadAuthorizationHeader = errors.New("Missing or invalid Authorization header, or method not allowed") var errContentLengthMismatch = errors.New("Actual length != expected content length") var errMethodNotSupported = errors.New("Method not supported") @@ -384,7 +428,8 @@ func (h *proxyHandler) Get(resp http.ResponseWriter, req *http.Request) { var pass bool var tok string - if pass, tok = CheckAuthorizationHeader(kc, h.APITokenCache, req); !pass { + var user *arvados.User + if pass, tok, user = h.CheckAuthorizationHeader(req); !pass { status, err = http.StatusForbidden, errBadAuthorizationHeader return } @@ -398,6 +443,18 @@ func (h *proxyHandler) Get(resp http.ResponseWriter, req *http.Request) { locator = removeHint.ReplaceAllString(locator, "$1") + if locator != "" { + parts := strings.SplitN(locator, "+", 3) + if len(parts) >= 2 { + logger := h.logger + if user != nil { + logger = logger.WithField("user_uuid", user.UUID). + WithField("user_full_name", user.FullName) + } + logger.WithField("locator", fmt.Sprintf("%s+%s", parts[0], parts[1])).Infof("Block download") + } + } + switch req.Method { case "HEAD": expectLength, proxiedURI, err = kc.Ask(locator) @@ -498,7 +555,8 @@ func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) { var pass bool var tok string - if pass, tok = CheckAuthorizationHeader(kc, h.APITokenCache, req); !pass { + var user *arvados.User + if pass, tok, user = h.CheckAuthorizationHeader(req); !pass { err = errBadAuthorizationHeader status = http.StatusForbidden return @@ -510,9 +568,9 @@ func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) { kc.Arvados = &arvclient // Check if the client specified the number of replicas - if req.Header.Get("X-Keep-Desired-Replicas") != "" { + if desiredReplicas := req.Header.Get(keepclient.XKeepDesiredReplicas); desiredReplicas != "" { var r int - _, err := fmt.Sscanf(req.Header.Get(keepclient.XKeepDesiredReplicas), "%d", &r) + _, err := fmt.Sscanf(desiredReplicas, "%d", &r) if err == nil { kc.Want_replicas = r } @@ -531,29 +589,46 @@ func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) { locatorOut, wroteReplicas, err = kc.PutHR(locatorIn, req.Body, expectLength) } + if locatorOut != "" { + parts := strings.SplitN(locatorOut, "+", 3) + if len(parts) >= 2 { + logger := h.logger + if user != nil { + logger = logger.WithField("user_uuid", user.UUID). + WithField("user_full_name", user.FullName) + } + logger.WithField("locator", fmt.Sprintf("%s+%s", parts[0], parts[1])).Infof("Block upload") + } + } + // Tell the client how many successful PUTs we accomplished resp.Header().Set(keepclient.XKeepReplicasStored, fmt.Sprintf("%d", wroteReplicas)) switch err.(type) { case nil: status = http.StatusOK + if len(kc.StorageClasses) > 0 { + // A successful PUT request with storage classes means that all + // storage classes were fulfilled, so the client will get a + // confirmation via the X-Storage-Classes-Confirmed header. + hdr := "" + isFirst := true + for _, sc := range kc.StorageClasses { + if isFirst { + hdr = fmt.Sprintf("%s=%d", sc, wroteReplicas) + isFirst = false + } else { + hdr += fmt.Sprintf(", %s=%d", sc, wroteReplicas) + } + } + resp.Header().Set(keepclient.XKeepStorageClassesConfirmed, hdr) + } _, err = io.WriteString(resp, locatorOut) - case keepclient.OversizeBlockError: // Too much data status = http.StatusRequestEntityTooLarge - case keepclient.InsufficientReplicasError: - if wroteReplicas > 0 { - // At least one write is considered success. The - // client can decide if getting less than the number of - // replications it asked for is a fatal error. - status = http.StatusOK - _, err = io.WriteString(resp, locatorOut) - } else { - status = http.StatusServiceUnavailable - } - + status = http.StatusServiceUnavailable default: status = http.StatusBadGateway } @@ -580,7 +655,7 @@ func (h *proxyHandler) Index(resp http.ResponseWriter, req *http.Request) { }() kc := h.makeKeepClient(req) - ok, token := CheckAuthorizationHeader(kc, h.APITokenCache, req) + ok, token, _ := h.CheckAuthorizationHeader(req) if !ok { status, err = http.StatusForbidden, errBadAuthorizationHeader return diff --git a/services/keepproxy/keepproxy_test.go b/services/keepproxy/keepproxy_test.go index 6a02ab9bd3..2d4266d8d5 100644 --- a/services/keepproxy/keepproxy_test.go +++ b/services/keepproxy/keepproxy_test.go @@ -26,6 +26,7 @@ import ( "git.arvados.org/arvados.git/sdk/go/keepclient" log "github.com/sirupsen/logrus" + "gopkg.in/check.v1" . "gopkg.in/check.v1" ) @@ -120,7 +121,7 @@ func (s *NoKeepServerSuite) TearDownSuite(c *C) { arvadostest.StopAPI() } -func runProxy(c *C, bogusClientToken bool, loadKeepstoresFromConfig bool) *keepclient.KeepClient { +func runProxy(c *C, bogusClientToken bool, loadKeepstoresFromConfig bool, kp *arvados.UploadDownloadRolePermissions) (*keepclient.KeepClient, *bytes.Buffer) { cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load() c.Assert(err, Equals, nil) cluster, err := cfg.GetCluster("") @@ -133,9 +134,16 @@ func runProxy(c *C, bogusClientToken bool, loadKeepstoresFromConfig bool) *keepc cluster.Services.Keepproxy.InternalURLs = map[arvados.URL]arvados.ServiceInstance{{Host: ":0"}: {}} + if kp != nil { + cluster.Collections.KeepproxyPermission = *kp + } + listener = nil + logbuf := &bytes.Buffer{} + logger := log.New() + logger.Out = logbuf go func() { - run(log.New(), cluster) + run(logger, cluster) defer closeListener() }() waitForListener() @@ -153,11 +161,11 @@ func runProxy(c *C, bogusClientToken bool, loadKeepstoresFromConfig bool) *keepc kc.SetServiceRoots(sr, sr, sr) kc.Arvados.External = true - return kc + return kc, logbuf } func (s *ServerRequiredSuite) TestResponseViaHeader(c *C) { - runProxy(c, false, false) + runProxy(c, false, false, nil) defer closeListener() req, err := http.NewRequest("POST", @@ -184,7 +192,7 @@ func (s *ServerRequiredSuite) TestResponseViaHeader(c *C) { } func (s *ServerRequiredSuite) TestLoopDetection(c *C) { - kc := runProxy(c, false, false) + kc, _ := runProxy(c, false, false, nil) defer closeListener() sr := map[string]string{ @@ -202,7 +210,7 @@ func (s *ServerRequiredSuite) TestLoopDetection(c *C) { } func (s *ServerRequiredSuite) TestStorageClassesHeader(c *C) { - kc := runProxy(c, false, false) + kc, _ := runProxy(c, false, false, nil) defer closeListener() // Set up fake keepstore to record request headers @@ -228,8 +236,30 @@ func (s *ServerRequiredSuite) TestStorageClassesHeader(c *C) { c.Check(hdr.Get("X-Keep-Storage-Classes"), Equals, "secure") } +func (s *ServerRequiredSuite) TestStorageClassesConfirmedHeader(c *C) { + runProxy(c, false, false) + defer closeListener() + + content := []byte("foo") + hash := fmt.Sprintf("%x", md5.Sum(content)) + client := &http.Client{} + + req, err := http.NewRequest("PUT", + fmt.Sprintf("http://%s/%s", listener.Addr().String(), hash), + bytes.NewReader(content)) + c.Assert(err, IsNil) + req.Header.Set("X-Keep-Storage-Classes", "default") + req.Header.Set("Authorization", "OAuth2 "+arvadostest.ActiveToken) + req.Header.Set("Content-Type", "application/octet-stream") + + resp, err := client.Do(req) + c.Assert(err, IsNil) + c.Assert(resp.StatusCode, Equals, http.StatusOK) + c.Assert(resp.Header.Get("X-Keep-Storage-Classes-Confirmed"), Equals, "default=2") +} + func (s *ServerRequiredSuite) TestDesiredReplicas(c *C) { - kc := runProxy(c, false, false) + kc, _ := runProxy(c, false, false, nil) defer closeListener() content := []byte("TestDesiredReplicas") @@ -246,7 +276,7 @@ func (s *ServerRequiredSuite) TestDesiredReplicas(c *C) { } func (s *ServerRequiredSuite) TestPutWrongContentLength(c *C) { - kc := runProxy(c, false, false) + kc, _ := runProxy(c, false, false, nil) defer closeListener() content := []byte("TestPutWrongContentLength") @@ -257,7 +287,8 @@ func (s *ServerRequiredSuite) TestPutWrongContentLength(c *C) { // fixes the invalid Content-Length header. In order to test // our server behavior, we have to call the handler directly // using an httptest.ResponseRecorder. - rtr := MakeRESTRouter(kc, 10*time.Second, "") + rtr, err := MakeRESTRouter(kc, 10*time.Second, &arvados.Cluster{}, log.New()) + c.Assert(err, check.IsNil) type testcase struct { sendLength string @@ -285,7 +316,7 @@ func (s *ServerRequiredSuite) TestPutWrongContentLength(c *C) { } func (s *ServerRequiredSuite) TestManyFailedPuts(c *C) { - kc := runProxy(c, false, false) + kc, _ := runProxy(c, false, false, nil) defer closeListener() router.(*proxyHandler).timeout = time.Nanosecond @@ -312,7 +343,7 @@ func (s *ServerRequiredSuite) TestManyFailedPuts(c *C) { } func (s *ServerRequiredSuite) TestPutAskGet(c *C) { - kc := runProxy(c, false, false) + kc, logbuf := runProxy(c, false, false, nil) defer closeListener() hash := fmt.Sprintf("%x", md5.Sum([]byte("foo"))) @@ -348,6 +379,9 @@ func (s *ServerRequiredSuite) TestPutAskGet(c *C) { c.Check(rep, Equals, 2) c.Check(err, Equals, nil) c.Log("Finished PutB (expected success)") + + c.Check(logbuf.String(), Matches, `(?ms).*msg="Block upload" locator=acbd18db4cc2f85cedef654fccc4a4d8\+3 user_full_name="TestCase Administrator" user_uuid=zzzzz-tpzed-d9tiejq69daie8f.*`) + logbuf.Reset() } { @@ -355,6 +389,8 @@ func (s *ServerRequiredSuite) TestPutAskGet(c *C) { c.Assert(err, Equals, nil) c.Check(blocklen, Equals, int64(3)) c.Log("Finished Ask (expected success)") + c.Check(logbuf.String(), Matches, `(?ms).*msg="Block download" locator=acbd18db4cc2f85cedef654fccc4a4d8\+3 user_full_name="TestCase Administrator" user_uuid=zzzzz-tpzed-d9tiejq69daie8f.*`) + logbuf.Reset() } { @@ -365,6 +401,8 @@ func (s *ServerRequiredSuite) TestPutAskGet(c *C) { c.Check(all, DeepEquals, []byte("foo")) c.Check(blocklen, Equals, int64(3)) c.Log("Finished Get (expected success)") + c.Check(logbuf.String(), Matches, `(?ms).*msg="Block download" locator=acbd18db4cc2f85cedef654fccc4a4d8\+3 user_full_name="TestCase Administrator" user_uuid=zzzzz-tpzed-d9tiejq69daie8f.*`) + logbuf.Reset() } { @@ -389,7 +427,7 @@ func (s *ServerRequiredSuite) TestPutAskGet(c *C) { } func (s *ServerRequiredSuite) TestPutAskGetForbidden(c *C) { - kc := runProxy(c, true, false) + kc, _ := runProxy(c, true, false, nil) defer closeListener() hash := fmt.Sprintf("%x+3", md5.Sum([]byte("bar"))) @@ -404,18 +442,116 @@ func (s *ServerRequiredSuite) TestPutAskGetForbidden(c *C) { blocklen, _, err := kc.Ask(hash) c.Check(err, FitsTypeOf, &keepclient.ErrNotFound{}) - c.Check(err, ErrorMatches, ".*not found.*") + c.Check(err, ErrorMatches, ".*HTTP 403.*") c.Check(blocklen, Equals, int64(0)) _, blocklen, _, err = kc.Get(hash) c.Check(err, FitsTypeOf, &keepclient.ErrNotFound{}) - c.Check(err, ErrorMatches, ".*not found.*") + c.Check(err, ErrorMatches, ".*HTTP 403.*") c.Check(blocklen, Equals, int64(0)) +} + +func testPermission(c *C, admin bool, perm arvados.UploadDownloadPermission) { + kp := arvados.UploadDownloadRolePermissions{} + if admin { + kp.Admin = perm + kp.User = arvados.UploadDownloadPermission{Upload: true, Download: true} + } else { + kp.Admin = arvados.UploadDownloadPermission{Upload: true, Download: true} + kp.User = perm + } + + kc, logbuf := runProxy(c, false, false, &kp) + defer closeListener() + if admin { + kc.Arvados.ApiToken = arvadostest.AdminToken + } else { + kc.Arvados.ApiToken = arvadostest.ActiveToken + } + + hash := fmt.Sprintf("%x", md5.Sum([]byte("foo"))) + var hash2 string + + { + var rep int + var err error + hash2, rep, err = kc.PutB([]byte("foo")) + + if perm.Upload { + c.Check(hash2, Matches, fmt.Sprintf(`^%s\+3(\+.+)?$`, hash)) + c.Check(rep, Equals, 2) + c.Check(err, Equals, nil) + c.Log("Finished PutB (expected success)") + if admin { + c.Check(logbuf.String(), Matches, `(?ms).*msg="Block upload" locator=acbd18db4cc2f85cedef654fccc4a4d8\+3 user_full_name="TestCase Administrator" user_uuid=zzzzz-tpzed-d9tiejq69daie8f.*`) + } else { + + c.Check(logbuf.String(), Matches, `(?ms).*msg="Block upload" locator=acbd18db4cc2f85cedef654fccc4a4d8\+3 user_full_name="Active User" user_uuid=zzzzz-tpzed-xurymjxw79nv3jz.*`) + } + } else { + c.Check(hash2, Equals, "") + c.Check(rep, Equals, 0) + c.Check(err, FitsTypeOf, keepclient.InsufficientReplicasError(errors.New(""))) + } + logbuf.Reset() + } + if perm.Upload { + // can't test download without upload. + + reader, blocklen, _, err := kc.Get(hash2) + if perm.Download { + c.Assert(err, Equals, nil) + all, err := ioutil.ReadAll(reader) + c.Check(err, IsNil) + c.Check(all, DeepEquals, []byte("foo")) + c.Check(blocklen, Equals, int64(3)) + c.Log("Finished Get (expected success)") + if admin { + c.Check(logbuf.String(), Matches, `(?ms).*msg="Block download" locator=acbd18db4cc2f85cedef654fccc4a4d8\+3 user_full_name="TestCase Administrator" user_uuid=zzzzz-tpzed-d9tiejq69daie8f.*`) + } else { + c.Check(logbuf.String(), Matches, `(?ms).*msg="Block download" locator=acbd18db4cc2f85cedef654fccc4a4d8\+3 user_full_name="Active User" user_uuid=zzzzz-tpzed-xurymjxw79nv3jz.*`) + } + } else { + c.Check(err, FitsTypeOf, &keepclient.ErrNotFound{}) + c.Check(err, ErrorMatches, ".*Missing or invalid Authorization header, or method not allowed.*") + c.Check(blocklen, Equals, int64(0)) + } + logbuf.Reset() + } } +func (s *ServerRequiredSuite) TestPutGetPermission(c *C) { + + for _, adminperm := range []bool{true, false} { + for _, userperm := range []bool{true, false} { + + testPermission(c, true, + arvados.UploadDownloadPermission{ + Upload: adminperm, + Download: true, + }) + testPermission(c, true, + arvados.UploadDownloadPermission{ + Upload: true, + Download: adminperm, + }) + testPermission(c, false, + arvados.UploadDownloadPermission{ + Upload: true, + Download: userperm, + }) + testPermission(c, false, + arvados.UploadDownloadPermission{ + Upload: true, + Download: userperm, + }) + } + } +} + func (s *ServerRequiredSuite) TestCorsHeaders(c *C) { - runProxy(c, false, false) + runProxy(c, false, false, nil) defer closeListener() { @@ -446,7 +582,7 @@ func (s *ServerRequiredSuite) TestCorsHeaders(c *C) { } func (s *ServerRequiredSuite) TestPostWithoutHash(c *C) { - runProxy(c, false, false) + runProxy(c, false, false, nil) defer closeListener() { @@ -504,7 +640,7 @@ func (s *ServerRequiredConfigYmlSuite) TestGetIndex(c *C) { } func getIndexWorker(c *C, useConfig bool) { - kc := runProxy(c, false, useConfig) + kc, _ := runProxy(c, false, useConfig, nil) defer closeListener() // Put "index-data" blocks @@ -567,7 +703,7 @@ func getIndexWorker(c *C, useConfig bool) { } func (s *ServerRequiredSuite) TestCollectionSharingToken(c *C) { - kc := runProxy(c, false, false) + kc, _ := runProxy(c, false, false, nil) defer closeListener() hash, _, err := kc.PutB([]byte("shareddata")) c.Check(err, IsNil) @@ -580,7 +716,7 @@ func (s *ServerRequiredSuite) TestCollectionSharingToken(c *C) { } func (s *ServerRequiredSuite) TestPutAskGetInvalidToken(c *C) { - kc := runProxy(c, false, false) + kc, _ := runProxy(c, false, false, nil) defer closeListener() // Put a test block @@ -608,16 +744,16 @@ func (s *ServerRequiredSuite) TestPutAskGetInvalidToken(c *C) { _, _, _, err = kc.Get(hash) c.Assert(err, FitsTypeOf, &keepclient.ErrNotFound{}) c.Check(err.(*keepclient.ErrNotFound).Temporary(), Equals, false) - c.Check(err, ErrorMatches, ".*HTTP 403 \"Missing or invalid Authorization header\".*") + c.Check(err, ErrorMatches, ".*HTTP 403 \"Missing or invalid Authorization header, or method not allowed\".*") } _, _, err = kc.PutB([]byte("foo")) - c.Check(err, ErrorMatches, ".*403.*Missing or invalid Authorization header") + c.Check(err, ErrorMatches, ".*403.*Missing or invalid Authorization header, or method not allowed") } } func (s *ServerRequiredSuite) TestAskGetKeepProxyConnectionError(c *C) { - kc := runProxy(c, false, false) + kc, _ := runProxy(c, false, false, nil) defer closeListener() // Point keepproxy at a non-existent keepstore @@ -643,7 +779,7 @@ func (s *ServerRequiredSuite) TestAskGetKeepProxyConnectionError(c *C) { } func (s *NoKeepServerSuite) TestAskGetNoKeepServerError(c *C) { - kc := runProxy(c, false, false) + kc, _ := runProxy(c, false, false, nil) defer closeListener() hash := fmt.Sprintf("%x", md5.Sum([]byte("foo"))) @@ -666,10 +802,11 @@ func (s *NoKeepServerSuite) TestAskGetNoKeepServerError(c *C) { } func (s *ServerRequiredSuite) TestPing(c *C) { - kc := runProxy(c, false, false) + kc, _ := runProxy(c, false, false, nil) defer closeListener() - rtr := MakeRESTRouter(kc, 10*time.Second, arvadostest.ManagementToken) + rtr, err := MakeRESTRouter(kc, 10*time.Second, &arvados.Cluster{ManagementToken: arvadostest.ManagementToken}, log.New()) + c.Assert(err, check.IsNil) req, err := http.NewRequest("GET", "http://"+listener.Addr().String()+"/_health/ping", diff --git a/tools/salt-install/local.params.example.multiple_hosts b/tools/salt-install/local.params.example.multiple_hosts index f5e40ff153..17b7b88884 100644 --- a/tools/salt-install/local.params.example.multiple_hosts +++ b/tools/salt-install/local.params.example.multiple_hosts @@ -82,6 +82,7 @@ LE_AWS_SECRET_ACCESS_KEY="thisistherandomstringthatisyoursecretkey" # Extra states to apply. If you use your own subdir, change this value accordingly # EXTRA_STATES_DIR="${CONFIG_DIR}/states" +# These are ARVADOS-related settings. # Which release of Arvados repo you want to use RELEASE="production" # Which version of Arvados you want to install. Defaults to latest stable @@ -90,13 +91,13 @@ RELEASE="production" # This is an arvados-formula setting. # If branch is set, the script will switch to it before running salt # Usually not needed, only used for testing -# BRANCH="master" +# BRANCH="main" ########################################################## # Usually there's no need to modify things below this line # Formulas versions -# ARVADOS_TAG="v1.1.4" +# ARVADOS_TAG="2.2.0" # POSTGRES_TAG="v0.41.6" # NGINX_TAG="temp-fix-missing-statements-in-pillar" # DOCKER_TAG="v1.0.0" diff --git a/tools/salt-install/local.params.example.single_host_multiple_hostnames b/tools/salt-install/local.params.example.single_host_multiple_hostnames index 6dd47722c1..ae54e7437a 100644 --- a/tools/salt-install/local.params.example.single_host_multiple_hostnames +++ b/tools/salt-install/local.params.example.single_host_multiple_hostnames @@ -54,6 +54,7 @@ USE_LETSENCRYPT="no" # Extra states to apply. If you use your own subdir, change this value accordingly # EXTRA_STATES_DIR="${CONFIG_DIR}/states" +# These are ARVADOS-related settings. # Which release of Arvados repo you want to use RELEASE="production" # Which version of Arvados you want to install. Defaults to latest stable @@ -62,13 +63,13 @@ RELEASE="production" # This is an arvados-formula setting. # If branch is set, the script will switch to it before running salt # Usually not needed, only used for testing -# BRANCH="master" +# BRANCH="main" ########################################################## # Usually there's no need to modify things below this line # Formulas versions -# ARVADOS_TAG="v1.1.4" +# ARVADOS_TAG="2.2.0" # POSTGRES_TAG="v0.41.6" # NGINX_TAG="temp-fix-missing-statements-in-pillar" # DOCKER_TAG="v1.0.0" diff --git a/tools/salt-install/local.params.example.single_host_single_hostname b/tools/salt-install/local.params.example.single_host_single_hostname index fda42a9c74..a35bd45bff 100644 --- a/tools/salt-install/local.params.example.single_host_single_hostname +++ b/tools/salt-install/local.params.example.single_host_single_hostname @@ -63,6 +63,7 @@ USE_LETSENCRYPT="no" # Extra states to apply. If you use your own subdir, change this value accordingly # EXTRA_STATES_DIR="${CONFIG_DIR}/states" +# These are ARVADOS-related settings. # Which release of Arvados repo you want to use RELEASE="production" # Which version of Arvados you want to install. Defaults to latest stable @@ -71,13 +72,13 @@ RELEASE="production" # This is an arvados-formula setting. # If branch is set, the script will switch to it before running salt # Usually not needed, only used for testing -# BRANCH="master" +# BRANCH="main" ########################################################## # Usually there's no need to modify things below this line # Formulas versions -# ARVADOS_TAG="v1.1.4" +# ARVADOS_TAG="2.2.0" # POSTGRES_TAG="v0.41.6" # NGINX_TAG="temp-fix-missing-statements-in-pillar" # DOCKER_TAG="v1.0.0" diff --git a/tools/salt-install/provision.sh b/tools/salt-install/provision.sh index c1af511ad4..7ac120e5fd 100755 --- a/tools/salt-install/provision.sh +++ b/tools/salt-install/provision.sh @@ -1,4 +1,4 @@ -#!/bin/bash -x +#!/usr/bin/env bash # Copyright (C) The Arvados Authors. All rights reserved. # @@ -21,7 +21,6 @@ usage() { echo >&2 echo >&2 "${0} options:" echo >&2 " -d, --debug Run salt installation in debug mode" - echo >&2 " -p , --ssl-port SSL port to use for the web applications" echo >&2 " -c , --config Path to the local.params config file" echo >&2 " -t, --test Test installation running a CWL workflow" echo >&2 " -r, --roles List of Arvados roles to apply to the host, comma separated" @@ -39,17 +38,35 @@ usage() { echo >&2 " workbench2" echo >&2 " Defaults to applying them all" echo >&2 " -h, --help Display this help and exit" + echo >&2 " --dump-config Dumps the pillars and states to a directory" + echo >&2 " This parameter does not perform any installation at all. It's" + echo >&2 " intended to give you a parsed sot of configuration files so" + echo >&2 " you can inspect them or use them in you Saltstack infrastructure." + echo >&2 " It" + echo >&2 " - parses the pillar and states templates," + echo >&2 " - downloads the helper formulas with their desired versions," + echo >&2 " - prepares the 'top.sls' files both for pillars and states" + echo >&2 " for the selected role/s" + echo >&2 " - writes the resulting files into " echo >&2 " -v, --vagrant Run in vagrant and use the /vagrant shared dir" echo >&2 } arguments() { # NOTE: This requires GNU getopt (part of the util-linux package on Debian-based distros). + if ! which getopt > /dev/null; then + echo >&2 "GNU getopt is required to run this script. Please install it and re-reun it" + exit 1 + fi + TEMP=$(getopt -o c:dhp:r:tv \ - --long config:,debug,help,ssl-port:,roles:,test,vagrant \ + --long config:,debug,dump-config:,help,roles:,test,vagrant \ -n "${0}" -- "${@}") - if [ ${?} != 0 ] ; then echo "GNU getopt missing? Use -h for help"; exit 1 ; fi + if [ ${?} != 0 ]; + then echo "Please check the parameters you entered and re-run again" + exit 1 + fi # Note the quotes around `$TEMP': they are essential! eval set -- "$TEMP" @@ -62,9 +79,23 @@ arguments() { -d | --debug) LOG_LEVEL="debug" shift + set -x ;; - -p | --ssl-port) - CONTROLLER_EXT_SSL_PORT=${2} + --dump-config) + if [[ ${2} = /* ]]; then + DUMP_SALT_CONFIG_DIR=${2} + else + DUMP_SALT_CONFIG_DIR=${PWD}/${2} + fi + ## states + S_DIR="${DUMP_SALT_CONFIG_DIR}/salt" + ## formulas + F_DIR="${DUMP_SALT_CONFIG_DIR}/formulas" + ## pillars + P_DIR="${DUMP_SALT_CONFIG_DIR}/pillars" + ## tests + T_DIR="${DUMP_SALT_CONFIG_DIR}/tests" + DUMP_CONFIG="yes" shift 2 ;; -r | --roles) @@ -102,6 +133,7 @@ arguments() { CONFIG_FILE="${SCRIPT_DIR}/local.params" CONFIG_DIR="local_config_dir" +DUMP_CONFIG="no" LOG_LEVEL="info" CONTROLLER_EXT_SSL_PORT=443 TESTS_DIR="tests" @@ -127,15 +159,20 @@ WEBSOCKET_EXT_SSL_PORT=8002 WORKBENCH1_EXT_SSL_PORT=443 WORKBENCH2_EXT_SSL_PORT=3001 +## These are ARVADOS-related parameters # For a stable release, change RELEASE "production" and VERSION to the # package version (including the iteration, e.g. X.Y.Z-1) of the # release. +# The "local.params.example.*" files already set "RELEASE=production" +# to deploy production-ready packages RELEASE="development" VERSION="latest" -# The arvados-formula version. For a stable release, this should be a +# These are arvados-formula-related parameters +# An arvados-formula tag. For a stable release, this should be a # branch name (e.g. X.Y-dev) or tag for the release. -ARVADOS_TAG="master" +# ARVADOS_TAG="2.2.0" +# BRANCH="main" # Other formula versions we depend on POSTGRES_TAG="v0.41.6" @@ -145,26 +182,31 @@ LOCALE_TAG="v0.3.4" LETSENCRYPT_TAG="v2.1.0" # Salt's dir +DUMP_SALT_CONFIG_DIR="" ## states S_DIR="/srv/salt" ## formulas F_DIR="/srv/formulas" -##pillars +## pillars P_DIR="/srv/pillars" +## tests +T_DIR="/tmp/cluster_tests" arguments ${@} if [ -s ${CONFIG_FILE} ]; then source ${CONFIG_FILE} else - echo >&2 "Please create a '${CONFIG_FILE}' file with initial values, as described in" + echo >&2 "You don't seem to have a config file with initial values." + echo >&2 "Please create a '${CONFIG_FILE}' file as described in" echo >&2 " * https://doc.arvados.org/install/salt-single-host.html#single_host, or" echo >&2 " * https://doc.arvados.org/install/salt-multi-host.html#multi_host_multi_hostnames" exit 1 fi if [ ! -d ${CONFIG_DIR} ]; then - echo >&2 "Please create a '${CONFIG_DIR}' with initial values, as described in" + echo >&2 "You don't seem to have a config directory with pillars and states." + echo >&2 "Please create a '${CONFIG_DIR}' directory (as configured in your '${CONFIG_FILE}'). Please see" echo >&2 " * https://doc.arvados.org/install/salt-single-host.html#single_host, or" echo >&2 " * https://doc.arvados.org/install/salt-multi-host.html#multi_host_multi_hostnames" exit 1 @@ -176,7 +218,7 @@ if grep -q 'fixme_or_this_wont_work' ${CONFIG_FILE} ; then exit 1 fi -if ! grep -E '^[[:alnum:]]{5}$' <<<${CLUSTER} ; then +if ! grep -qE '^[[:alnum:]]{5}$' <<<${CLUSTER} ; then echo >&2 "ERROR: must be exactly 5 alphanumeric characters long" echo >&2 "Fix the cluster name in the 'local.params' file and re-run the provision script" exit 1 @@ -187,20 +229,23 @@ if [ "x${HOSTNAME_EXT}" = "x" ] ; then HOSTNAME_EXT="${CLUSTER}.${DOMAIN}" fi -apt-get update -apt-get install -y curl git jq - -if which salt-call; then - echo "Salt already installed" +if [ "${DUMP_CONFIG}" = "yes" ]; then + echo "The provision installer will just dump a config under ${DUMP_SALT_CONFIG_DIR} and exit" else - curl -L https://bootstrap.saltstack.com -o /tmp/bootstrap_salt.sh - sh /tmp/bootstrap_salt.sh -XdfP -x python3 - /bin/systemctl stop salt-minion.service - /bin/systemctl disable salt-minion.service -fi + apt-get update + apt-get install -y curl git jq + + if which salt-call; then + echo "Salt already installed" + else + curl -L https://bootstrap.saltstack.com -o /tmp/bootstrap_salt.sh + sh /tmp/bootstrap_salt.sh -XdfP -x python3 + /bin/systemctl stop salt-minion.service + /bin/systemctl disable salt-minion.service + fi -# Set salt to masterless mode -cat > /etc/salt/minion << EOFSM + # Set salt to masterless mode + cat > /etc/salt/minion << EOFSM file_client: local file_roots: base: @@ -211,24 +256,36 @@ pillar_roots: base: - ${P_DIR} EOFSM +fi -mkdir -p ${S_DIR} ${F_DIR} ${P_DIR} +mkdir -p ${S_DIR} ${F_DIR} ${P_DIR} ${T_DIR} # Get the formula and dependencies cd ${F_DIR} || exit 1 -git clone --branch "${ARVADOS_TAG}" https://git.arvados.org/arvados-formula.git -git clone --branch "${DOCKER_TAG}" https://github.com/saltstack-formulas/docker-formula.git -git clone --branch "${LOCALE_TAG}" https://github.com/saltstack-formulas/locale-formula.git -# git clone --branch "${NGINX_TAG}" https://github.com/saltstack-formulas/nginx-formula.git -git clone --branch "${NGINX_TAG}" https://github.com/netmanagers/nginx-formula.git -git clone --branch "${POSTGRES_TAG}" https://github.com/saltstack-formulas/postgres-formula.git -git clone --branch "${LETSENCRYPT_TAG}" https://github.com/saltstack-formulas/letsencrypt-formula.git +echo "Cloning formulas" +rm -rf ${F_DIR}/* || exit 1 +git clone --quiet https://github.com/saltstack-formulas/docker-formula.git ${F_DIR}/docker +( cd docker && git checkout --quiet tags/"${DOCKER_TAG}" -b "${DOCKER_TAG}" ) + +git clone --quiet https://github.com/saltstack-formulas/locale-formula.git ${F_DIR}/locale +( cd locale && git checkout --quiet tags/"${LOCALE_TAG}" -b "${LOCALE_TAG}" ) + +git clone --quiet https://github.com/netmanagers/nginx-formula.git ${F_DIR}/nginx +( cd nginx && git checkout --quiet tags/"${NGINX_TAG}" -b "${NGINX_TAG}" ) + +git clone --quiet https://github.com/saltstack-formulas/postgres-formula.git ${F_DIR}/postgres +( cd postgres && git checkout --quiet tags/"${POSTGRES_TAG}" -b "${POSTGRES_TAG}" ) + +git clone --quiet https://github.com/saltstack-formulas/letsencrypt-formula.git ${F_DIR}/letsencrypt +( cd letsencrypt && git checkout --quiet tags/"${LETSENCRYPT_TAG}" -b "${LETSENCRYPT_TAG}" ) + +git clone --quiet https://git.arvados.org/arvados-formula.git ${F_DIR}/arvados # If we want to try a specific branch of the formula if [ "x${BRANCH}" != "x" ]; then - cd ${F_DIR}/arvados-formula || exit 1 - git checkout -t origin/"${BRANCH}" -b "${BRANCH}" - cd - + ( cd ${F_DIR}/arvados && git checkout --quiet -t origin/"${BRANCH}" -b "${BRANCH}" ) +elif [ "x${ARVADOS_TAG}" != "x" ]; then +( cd ${F_DIR}/arvados && git checkout --quiet tags/"${ARVADOS_TAG}" -b "${ARVADOS_TAG}" ) fi if [ "x${VAGRANT}" = "xyes" ]; then @@ -243,6 +300,8 @@ fi SOURCE_STATES_DIR="${EXTRA_STATES_DIR}" +echo "Writing pillars and states" + # Replace variables (cluster, domain, etc) in the pillars, states and tests # to ease deployment for newcomers if [ ! -d "${SOURCE_PILLARS_DIR}" ]; then @@ -294,7 +353,7 @@ if [ "x${TEST}" = "xyes" ] && [ ! -d "${SOURCE_TESTS_DIR}" ]; then echo "You requested to run tests, but ${SOURCE_TESTS_DIR} does not exist or is not a directory. Exiting." exit 1 fi -mkdir -p /tmp/cluster_tests +mkdir -p ${T_DIR} # Replace cluster and domain name in the test files for f in $(ls "${SOURCE_TESTS_DIR}"/*); do sed "s#__CLUSTER__#${CLUSTER}#g; @@ -306,9 +365,9 @@ for f in $(ls "${SOURCE_TESTS_DIR}"/*); do s#__INITIAL_USER__#${INITIAL_USER}#g; s#__DATABASE_PASSWORD__#${DATABASE_PASSWORD}#g; s#__SYSTEM_ROOT_TOKEN__#${SYSTEM_ROOT_TOKEN}#g" \ - "${f}" > "/tmp/cluster_tests"/$(basename "${f}") + "${f}" > ${T_DIR}/$(basename "${f}") done -chmod 755 /tmp/cluster_tests/run-test.sh +chmod 755 ${T_DIR}/run-test.sh # Replace helper state files that differ from the formula's examples if [ -d "${SOURCE_STATES_DIR}" ]; then @@ -500,6 +559,11 @@ else done fi +if [ "${DUMP_CONFIG}" = "yes" ]; then + # We won't run the rest of the script because we're just dumping the config + exit 0 +fi + # FIXME! #16992 Temporary fix for psql call in arvados-api-server if [ -e /root/.psqlrc ]; then if ! ( grep 'pset pager off' /root/.psqlrc ); then @@ -542,6 +606,6 @@ fi # Test that the installation finished correctly if [ "x${TEST}" = "xyes" ]; then - cd /tmp/cluster_tests + cd ${T_DIR} ./run-test.sh fi diff --git a/tools/user-activity/arvados_user_activity/main.py b/tools/user-activity/arvados_user_activity/main.py index 959f16d898..997da57e05 100755 --- a/tools/user-activity/arvados_user_activity/main.py +++ b/tools/user-activity/arvados_user_activity/main.py @@ -41,6 +41,13 @@ def getuserinfo(arv, uuid): arv.config()["Services"]["Workbench1"]["ExternalURL"], uuid, prof) +collectionNameCache = {} +def getCollectionName(arv, uuid): + if uuid not in collectionNameCache: + u = arv.collections().get(uuid=uuid).execute() + collectionNameCache[uuid] = u["name"] + return collectionNameCache[uuid] + def getname(u): return "\"%s\" (%s)" % (u["name"], u["uuid"]) @@ -137,6 +144,19 @@ def main(arguments=None): else: users[owner].append("%s Deleted collection %s %s" % (event_at, getname(e["properties"]["old_attributes"]), loguuid)) + elif e["event_type"] == "file_download": + users[e["object_uuid"]].append("%s Downloaded file \"%s\" from \"%s\" (%s) (%s)" % (event_at, + e["properties"].get("collection_file_path") or e["properties"].get("reqPath"), + getCollectionName(arv, e["properties"].get("collection_uuid")), + e["properties"].get("collection_uuid"), + e["properties"].get("portable_data_hash"))) + + elif e["event_type"] == "file_upload": + users[e["object_uuid"]].append("%s Uploaded file \"%s\" to \"%s\" (%s)" % (event_at, + e["properties"].get("collection_file_path") or e["properties"].get("reqPath"), + getCollectionName(arv, e["properties"].get("collection_uuid")), + e["properties"].get("collection_uuid"))) + else: users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], loguuid))