From: Ward Vandewege Date: Fri, 3 Sep 2021 14:03:20 +0000 (-0400) Subject: 17755: Merge branch 'main' into 17755-add-singularity-to-compute-image X-Git-Tag: 2.3.0~87^2 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/72d7d41944006d1f48f570784dafe56b9812b0c8?hp=7af0535d3b0d7960152b06b7211c26bfd7b208cb 17755: Merge branch 'main' into 17755-add-singularity-to-compute-image Arvados-DCO-1.1-Signed-off-by: Ward Vandewege --- diff --git a/apps/workbench/test/controllers/work_units_controller_test.rb b/apps/workbench/test/controllers/work_units_controller_test.rb index 6f74955cd1..0191c7f0df 100644 --- a/apps/workbench/test/controllers/work_units_controller_test.rb +++ b/apps/workbench/test/controllers/work_units_controller_test.rb @@ -13,26 +13,26 @@ class WorkUnitsControllerTest < ActionController::TestCase [ ['foo', 10, 25, ['/pipeline_instances/zzzzz-d1hrv-1xfj6xkicf2muk2', - '/pipeline_instances/zzzzz-d1hrv-jobspeccomponts', + '/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4', '/jobs/zzzzz-8i9sb-grx15v5mjnsyxk7'], ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3', '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf', '/container_requests/zzzzz-xvhdp-cr4completedcr2']], ['pipeline_with_tagged_collection_input', 1, 1, ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3'], - ['/pipeline_instances/zzzzz-d1hrv-jobspeccomponts', + ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4', '/jobs/zzzzz-8i9sb-pshmckwoma9plh7', '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf', '/container_requests/zzzzz-xvhdp-cr4completedcr2']], ['no_such_match', 0, 0, [], - ['/pipeline_instances/zzzzz-d1hrv-jobspeccomponts', + ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4', '/jobs/zzzzz-8i9sb-pshmckwoma9plh7', '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf', '/container_requests/zzzzz-xvhdp-cr4completedcr2']], ].each do |search_filter, expected_min, expected_max, expected, not_expected| test "all_processes page for search filter '#{search_filter}'" do - work_units_index(filters: [['any','@@', search_filter]], show_children: true) + work_units_index(filters: [['any','ilike', "%#{search_filter}%"]], show_children: true) assert_response :success # Verify that expected number of processes are found diff --git a/apps/workbench/test/integration/work_units_test.rb b/apps/workbench/test/integration/work_units_test.rb index 4f2ebbc554..36b29468ff 100644 --- a/apps/workbench/test/integration/work_units_test.rb +++ b/apps/workbench/test/integration/work_units_test.rb @@ -14,7 +14,7 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest [[true, 25, 100, ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3', - '/pipeline_instances/zzzzz-d1hrv-jobspeccomponts', + '/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4', '/jobs/zzzzz-8i9sb-grx15v5mjnsyxk7', '/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf', '/container_requests/zzzzz-xvhdp-cr4completedcr2', @@ -23,7 +23,7 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest '/container_requests/zzzzz-xvhdp-oneof60crs00001']], [false, 25, 100, ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3', - '/pipeline_instances/zzzzz-d1hrv-jobspeccomponts', + '/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4', '/container_requests/zzzzz-xvhdp-cr4completedcr2'], ['/pipeline_instances/zzzzz-d1hrv-scarxiyajtshq3l', '/container_requests/zzzzz-xvhdp-oneof60crs00001', diff --git a/build/run-build-packages-one-target.sh b/build/run-build-packages-one-target.sh index 81aac9c616..7a91cb4de1 100755 --- a/build/run-build-packages-one-target.sh +++ b/build/run-build-packages-one-target.sh @@ -110,6 +110,7 @@ while [ $# -gt 0 ]; do echo >&2 "FATAL: --build-version '$2' is invalid, must match pattern ^[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+|)(~rc[0-9]+|~dev[0-9]+|)-[0-9]+$" exit 1 else + [[ "$2" =~ (.*)-(.*) ]] ARVADOS_BUILDING_VERSION="${BASH_REMATCH[1]}" ARVADOS_BUILDING_ITERATION="${BASH_REMATCH[2]}" fi diff --git a/doc/admin/upgrading.html.textile.liquid b/doc/admin/upgrading.html.textile.liquid index 8435e2871f..9e7410260f 100644 --- a/doc/admin/upgrading.html.textile.liquid +++ b/doc/admin/upgrading.html.textile.liquid @@ -39,6 +39,10 @@ h2(#main). development main (as of 2021-07-15) "Upgrading from 2.2.0":#v2_2_0 +h3. Removed deprecated '@@' search operator + +The '@@' full text search operator, previously deprecated, has been removed. To perform a string search across multiple columns, use the 'ilike' operator on 'any' column as described in the "available list method filter section":{{site.baseurl}}/api/methods.html#substringsearchfilter of the API documentation. + h3. Storage classes must be defined explicitly If your configuration uses the StorageClasses attribute on any Keep volumes, you must add a new @StorageClasses@ section that lists all of your storage classes. Refer to the updated documentation about "configuring storage classes":{{site.baseurl}}/admin/storage-classes.html for details. diff --git a/doc/api/methods.html.textile.liquid b/doc/api/methods.html.textile.liquid index c6e4ba00a7..e051ab66fa 100644 --- a/doc/api/methods.html.textile.liquid +++ b/doc/api/methods.html.textile.liquid @@ -96,16 +96,20 @@ table(table table-bordered table-condensed). |1|operator|string|Comparison operator|@>@, @>=@, @like@, @not in@| |2|operand|string, array, or null|Value to compare with the resource attribute|@"d00220fb%"@, @"1234"@, @["foo","bar"]@, @nil@| -The following operators are available.[1] +The following operators are available. table(table table-bordered table-condensed). |_. Operator|_. Operand type|_. Description|_. Example| -|@=@, @!=@|string, number, timestamp, or null|Equality comparison|@["tail_uuid","=","xyzzy-j7d0g-fffffffffffffff"]@ @["tail_uuid","!=",null]@| +|@=@, @!=@, @<>@|string, number, timestamp, JSON-encoded array, JSON-encoded object, or null|Equality comparison|@["tail_uuid","=","xyzzy-j7d0g-fffffffffffffff"]@ +@["tail_uuid","!=",null]@ +@["storage_classes_desired","=","[\"default\"]"]@| |@<@, @<=@, @>=@, @>@|string, number, or timestamp|Ordering comparison|@["script_version",">","123"]@| |@like@, @ilike@|string|SQL pattern match. Single character match is @_@ and wildcard is @%@. The @ilike@ operator is case-insensitive|@["script_version","like","d00220fb%"]@| |@in@, @not in@|array of strings|Set membership|@["script_version","in",["main","d00220fb38d4b85ca8fc28a8151702a2b9d1dec5"]]@| |@is_a@|string|Arvados object type|@["head_uuid","is_a","arvados#collection"]@| -|@exists@|string|Test if a subproperty is present.|@["properties","exists","my_subproperty"]@| +|@exists@|string|Presence of subproperty|@["properties","exists","my_subproperty"]@| +|@contains@|string, array of strings|Presence of one or more keys or array elements|@["storage_classes_desired", "contains", ["foo", "bar"]]@ (matches both @["foo", "bar"]@ and @["foo", "bar", "baz"]@) +(note @[..., "contains", "foo"]@ is also accepted, and is equivalent to @[..., "contains", ["foo"]]@)| h4(#substringsearchfilter). Filtering using substring search @@ -128,7 +132,7 @@ table(table table-bordered table-condensed). |@like@, @ilike@|string|SQL pattern match, single character match is @_@ and wildcard is @%@, ilike is case-insensitive|@["properties.my_subproperty", "like", "d00220fb%"]@| |@in@, @not in@|array of strings|Set membership|@["properties.my_subproperty", "in", ["fizz", "buzz"]]@| |@exists@|boolean|Test if a subproperty is present or not (determined by operand).|@["properties.my_subproperty", "exists", true]@| -|@contains@|string, number|Filter where subproperty has a value either by exact match or value is element of subproperty list.|@["foo", "contains", "bar"]@ will find both @{"foo": "bar"}@ and @{"foo": ["bar", "baz"]}@.| +|@contains@|string, number|Filter where subproperty has a value either by exact match or value is element of subproperty list.|@["properties.foo", "contains", "bar"]@ will find both @{"foo": "bar"}@ and @{"foo": ["bar", "baz"]}@.| Note that exclusion filters @!=@ and @not in@ will return records for which the property is not defined at all. To restrict filtering to records on which the subproperty is defined, combine with an @exists@ filter. @@ -167,5 +171,3 @@ table(table table-bordered table-condensed). |_. Argument |_. Type |_. Description |_. Location | {background:#ccffcc}.|uuid|string|The UUID of the resource in question.|path|| |{resource_type}|object||query|| - -fn1^. NOTE: The filter operator for full-text search (@@) which previously worked (but was undocumented) is deprecated and will be removed in a future release. diff --git a/doc/api/methods/collections.html.textile.liquid b/doc/api/methods/collections.html.textile.liquid index fd4a36f291..6c1cc691c3 100644 --- a/doc/api/methods/collections.html.textile.liquid +++ b/doc/api/methods/collections.html.textile.liquid @@ -32,7 +32,10 @@ table(table table-bordered table-condensed). |manifest_text|text||| |replication_desired|number|Minimum storage replication level desired for each data block referenced by this collection. A value of @null@ signifies that the site default replication level (typically 2) is desired.|@2@| |replication_confirmed|number|Replication level most recently confirmed by the storage system. This field is null when a collection is first created, and is reset to null when the manifest_text changes in a way that introduces a new data block. An integer value indicates the replication level of the _least replicated_ data block in the collection.|@2@, null| -|replication_confirmed_at|datetime|When replication_confirmed was confirmed. If replication_confirmed is null, this field is also null.|| +|replication_confirmed_at|datetime|When @replication_confirmed@ was confirmed. If @replication_confirmed@ is null, this field is also null.|| +|storage_classes_desired|list|An optional list of storage class names where the blocks should be saved. If not provided, the cluster's default storage class(es) will be set.|@['archival']@| +|storage_classes_confirmed|list|Storage classes most recently confirmed by the storage system. This field is an empty list when a collection is first created.|@'archival']@, @[]@| +|storage_classes_confirmed_at|datetime|When @storage_classes_confirmed@ was confirmed. If @storage_classes_confirmed@ is @[]@, this field is null.|| |trash_at|datetime|If @trash_at@ is non-null and in the past, this collection will be hidden from API calls. May be untrashed.|| |delete_at|datetime|If @delete_at@ is non-null and in the past, the collection may be permanently deleted.|| |is_trashed|boolean|True if @trash_at@ is in the past, false if not.|| diff --git a/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid b/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid index 66b562d131..301fd7306a 100644 --- a/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid +++ b/doc/install/crunch2-lsf/install-dispatch.html.textile.liquid @@ -62,10 +62,12 @@ When arvados-dispatch-lsf invokes @bsub@, you can add arguments to the command b
    Containers:
       LSF:
-        BsubArgumentsList: ["-C", "0"]
+        BsubArgumentsList: ["-C", "0", "-o", "/tmp/crunch-run.%J.out", "-e", "/tmp/crunch-run.%J.err"]
 
+Note that the default value for @BsubArgumentsList@ uses the @-o@ and @-e@ arguments to write stdout/stderr data to files in @/tmp@ on the compute nodes, which is helpful for troubleshooting installation/configuration problems. Ensure you have something in place to delete old files from @/tmp@, or adjust these arguments accordingly. + h3(#PollPeriod). Containers.PollInterval diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index ec32613905..ff48d48f41 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -1026,7 +1026,12 @@ Clusters: LSF: # Additional arguments to bsub when submitting Arvados # containers as LSF jobs. - BsubArgumentsList: [] + # + # Note that the default arguments cause LSF to write two files + # in /tmp on the compute node each time an Arvados container + # runs. Ensure you have something in place to delete old files + # from /tmp, or adjust these arguments accordingly. + BsubArgumentsList: ["-o", "/tmp/crunch-run.%J.out", "-e", "/tmp/crunch-run.%J.err"] # Use sudo to switch to this user account when submitting LSF # jobs. diff --git a/lib/config/export.go b/lib/config/export.go index 065011cc2e..92e2d7b4d5 100644 --- a/lib/config/export.go +++ b/lib/config/export.go @@ -105,7 +105,7 @@ var whitelist = map[string]bool{ "Collections.PreserveVersionIfIdle": true, "Collections.S3FolderObjects": true, "Collections.TrashSweepInterval": false, - "Collections.TrustAllContent": false, + "Collections.TrustAllContent": true, "Collections.WebDAVCache": false, "Collections.KeepproxyPermission": false, "Collections.WebDAVPermission": false, diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go index 36a21d4c35..097396a0ae 100644 --- a/lib/config/generated_config.go +++ b/lib/config/generated_config.go @@ -1032,7 +1032,12 @@ Clusters: LSF: # Additional arguments to bsub when submitting Arvados # containers as LSF jobs. - BsubArgumentsList: [] + # + # Note that the default arguments cause LSF to write two files + # in /tmp on the compute node each time an Arvados container + # runs. Ensure you have something in place to delete old files + # from /tmp, or adjust these arguments accordingly. + BsubArgumentsList: ["-o", "/tmp/crunch-run.%J.out", "-e", "/tmp/crunch-run.%J.err"] # Use sudo to switch to this user account when submitting LSF # jobs. diff --git a/lib/controller/fed_collections.go b/lib/controller/fed_collections.go deleted file mode 100644 index a0a123129f..0000000000 --- a/lib/controller/fed_collections.go +++ /dev/null @@ -1,312 +0,0 @@ -// Copyright (C) The Arvados Authors. All rights reserved. -// -// SPDX-License-Identifier: AGPL-3.0 - -package controller - -import ( - "bufio" - "bytes" - "context" - "crypto/md5" - "encoding/json" - "fmt" - "io" - "io/ioutil" - "net/http" - "strings" - "sync" - - "git.arvados.org/arvados.git/sdk/go/arvados" - "git.arvados.org/arvados.git/sdk/go/httpserver" - "git.arvados.org/arvados.git/sdk/go/keepclient" -) - -func rewriteSignatures(clusterID string, expectHash string, - resp *http.Response, requestError error) (newResponse *http.Response, err error) { - - if requestError != nil { - return resp, requestError - } - - if resp.StatusCode != http.StatusOK { - return resp, nil - } - - originalBody := resp.Body - defer originalBody.Close() - - var col arvados.Collection - err = json.NewDecoder(resp.Body).Decode(&col) - if err != nil { - return nil, err - } - - // rewriting signatures will make manifest text 5-10% bigger so calculate - // capacity accordingly - updatedManifest := bytes.NewBuffer(make([]byte, 0, int(float64(len(col.ManifestText))*1.1))) - - hasher := md5.New() - mw := io.MultiWriter(hasher, updatedManifest) - sz := 0 - - scanner := bufio.NewScanner(strings.NewReader(col.ManifestText)) - scanner.Buffer(make([]byte, 1048576), len(col.ManifestText)) - for scanner.Scan() { - line := scanner.Text() - tokens := strings.Split(line, " ") - if len(tokens) < 3 { - return nil, fmt.Errorf("Invalid stream (<3 tokens): %q", line) - } - - n, err := mw.Write([]byte(tokens[0])) - if err != nil { - return nil, fmt.Errorf("Error updating manifest: %v", err) - } - sz += n - for _, token := range tokens[1:] { - n, err = mw.Write([]byte(" ")) - if err != nil { - return nil, fmt.Errorf("Error updating manifest: %v", err) - } - sz += n - - m := keepclient.SignedLocatorRe.FindStringSubmatch(token) - if m != nil { - // Rewrite the block signature to be a remote signature - _, err = fmt.Fprintf(updatedManifest, "%s%s%s+R%s-%s%s", m[1], m[2], m[3], clusterID, m[5][2:], m[8]) - if err != nil { - return nil, fmt.Errorf("Error updating manifest: %v", err) - } - - // for hash checking, ignore signatures - n, err = fmt.Fprintf(hasher, "%s%s", m[1], m[2]) - if err != nil { - return nil, fmt.Errorf("Error updating manifest: %v", err) - } - sz += n - } else { - n, err = mw.Write([]byte(token)) - if err != nil { - return nil, fmt.Errorf("Error updating manifest: %v", err) - } - sz += n - } - } - n, err = mw.Write([]byte("\n")) - if err != nil { - return nil, fmt.Errorf("Error updating manifest: %v", err) - } - sz += n - } - - // Check that expected hash is consistent with - // portable_data_hash field of the returned record - if expectHash == "" { - expectHash = col.PortableDataHash - } else if expectHash != col.PortableDataHash { - return nil, fmt.Errorf("portable_data_hash %q on returned record did not match expected hash %q ", expectHash, col.PortableDataHash) - } - - // Certify that the computed hash of the manifest_text matches our expectation - sum := hasher.Sum(nil) - computedHash := fmt.Sprintf("%x+%v", sum, sz) - if computedHash != expectHash { - return nil, fmt.Errorf("Computed manifest_text hash %q did not match expected hash %q", computedHash, expectHash) - } - - col.ManifestText = updatedManifest.String() - - newbody, err := json.Marshal(col) - if err != nil { - return nil, err - } - - buf := bytes.NewBuffer(newbody) - resp.Body = ioutil.NopCloser(buf) - resp.ContentLength = int64(buf.Len()) - resp.Header.Set("Content-Length", fmt.Sprintf("%v", buf.Len())) - - return resp, nil -} - -func filterLocalClusterResponse(resp *http.Response, requestError error) (newResponse *http.Response, err error) { - if requestError != nil { - return resp, requestError - } - - if resp.StatusCode == http.StatusNotFound { - // Suppress returning this result, because we want to - // search the federation. - return nil, nil - } - return resp, nil -} - -type searchRemoteClusterForPDH struct { - pdh string - remoteID string - mtx *sync.Mutex - sentResponse *bool - sharedContext *context.Context - cancelFunc func() - errors *[]string - statusCode *int -} - -func fetchRemoteCollectionByUUID( - h *genericFederatedRequestHandler, - effectiveMethod string, - clusterID *string, - uuid string, - remainder string, - w http.ResponseWriter, - req *http.Request) bool { - - if effectiveMethod != "GET" { - // Only handle GET requests right now - return false - } - - if uuid != "" { - // Collection UUID GET request - *clusterID = uuid[0:5] - if *clusterID != "" && *clusterID != h.handler.Cluster.ClusterID { - // request for remote collection by uuid - resp, err := h.handler.remoteClusterRequest(*clusterID, req) - newResponse, err := rewriteSignatures(*clusterID, "", resp, err) - h.handler.proxy.ForwardResponse(w, newResponse, err) - return true - } - } - - return false -} - -func fetchRemoteCollectionByPDH( - h *genericFederatedRequestHandler, - effectiveMethod string, - clusterID *string, - uuid string, - remainder string, - w http.ResponseWriter, - req *http.Request) bool { - - if effectiveMethod != "GET" { - // Only handle GET requests right now - return false - } - - m := collectionsByPDHRe.FindStringSubmatch(req.URL.Path) - if len(m) != 2 { - return false - } - - // Request for collection by PDH. Search the federation. - - // First, query the local cluster. - resp, err := h.handler.localClusterRequest(req) - newResp, err := filterLocalClusterResponse(resp, err) - if newResp != nil || err != nil { - h.handler.proxy.ForwardResponse(w, newResp, err) - return true - } - - // Create a goroutine for each cluster in the - // RemoteClusters map. The first valid result gets - // returned to the client. When that happens, all - // other outstanding requests are cancelled - sharedContext, cancelFunc := context.WithCancel(req.Context()) - defer cancelFunc() - - req = req.WithContext(sharedContext) - wg := sync.WaitGroup{} - pdh := m[1] - success := make(chan *http.Response) - errorChan := make(chan error, len(h.handler.Cluster.RemoteClusters)) - - acquire, release := semaphore(h.handler.Cluster.API.MaxRequestAmplification) - - for remoteID := range h.handler.Cluster.RemoteClusters { - if remoteID == h.handler.Cluster.ClusterID { - // No need to query local cluster again - continue - } - if remoteID == "*" { - // This isn't a real remote cluster: it just sets defaults for unlisted remotes. - continue - } - - wg.Add(1) - go func(remote string) { - defer wg.Done() - acquire() - defer release() - select { - case <-sharedContext.Done(): - return - default: - } - - resp, err := h.handler.remoteClusterRequest(remote, req) - wasSuccess := false - defer func() { - if resp != nil && !wasSuccess { - resp.Body.Close() - } - }() - if err != nil { - errorChan <- err - return - } - if resp.StatusCode != http.StatusOK { - errorChan <- HTTPError{resp.Status, resp.StatusCode} - return - } - select { - case <-sharedContext.Done(): - return - default: - } - - newResponse, err := rewriteSignatures(remote, pdh, resp, nil) - if err != nil { - errorChan <- err - return - } - select { - case <-sharedContext.Done(): - case success <- newResponse: - wasSuccess = true - } - }(remoteID) - } - go func() { - wg.Wait() - cancelFunc() - }() - - errorCode := http.StatusNotFound - - for { - select { - case newResp = <-success: - h.handler.proxy.ForwardResponse(w, newResp, nil) - return true - case <-sharedContext.Done(): - var errors []string - for len(errorChan) > 0 { - err := <-errorChan - if httperr, ok := err.(HTTPError); !ok || httperr.Code != http.StatusNotFound { - errorCode = http.StatusBadGateway - } - errors = append(errors, err.Error()) - } - httpserver.Errors(w, errors, errorCode) - return true - } - } - - // shouldn't ever get here - return true -} diff --git a/lib/controller/fed_containers.go b/lib/controller/fed_containers.go deleted file mode 100644 index fd4f0521bc..0000000000 --- a/lib/controller/fed_containers.go +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright (C) The Arvados Authors. All rights reserved. -// -// SPDX-License-Identifier: AGPL-3.0 - -package controller - -import ( - "bytes" - "encoding/json" - "fmt" - "io/ioutil" - "net/http" - "strings" - - "git.arvados.org/arvados.git/sdk/go/auth" - "git.arvados.org/arvados.git/sdk/go/httpserver" -) - -func remoteContainerRequestCreate( - h *genericFederatedRequestHandler, - effectiveMethod string, - clusterID *string, - uuid string, - remainder string, - w http.ResponseWriter, - req *http.Request) bool { - - if effectiveMethod != "POST" || uuid != "" || remainder != "" { - return false - } - - // First make sure supplied token is valid. - creds := auth.NewCredentials() - creds.LoadTokensFromHTTPRequest(req) - - currentUser, ok, err := h.handler.validateAPItoken(req, creds.Tokens[0]) - if err != nil { - httpserver.Error(w, err.Error(), http.StatusInternalServerError) - return true - } else if !ok { - httpserver.Error(w, "invalid API token", http.StatusForbidden) - return true - } - - if *clusterID == "" || *clusterID == h.handler.Cluster.ClusterID { - // Submitting container request to local cluster. No - // need to set a runtime_token (rails api will create - // one when the container runs) or do a remote cluster - // request. - return false - } - - if req.Header.Get("Content-Type") != "application/json" { - httpserver.Error(w, "Expected Content-Type: application/json, got "+req.Header.Get("Content-Type"), http.StatusBadRequest) - return true - } - - originalBody := req.Body - defer originalBody.Close() - var request map[string]interface{} - err = json.NewDecoder(req.Body).Decode(&request) - if err != nil { - httpserver.Error(w, err.Error(), http.StatusBadRequest) - return true - } - - crString, ok := request["container_request"].(string) - if ok { - var crJSON map[string]interface{} - err := json.Unmarshal([]byte(crString), &crJSON) - if err != nil { - httpserver.Error(w, err.Error(), http.StatusBadRequest) - return true - } - - request["container_request"] = crJSON - } - - containerRequest, ok := request["container_request"].(map[string]interface{}) - if !ok { - // Use toplevel object as the container_request object - containerRequest = request - } - - // If runtime_token is not set, create a new token - if _, ok := containerRequest["runtime_token"]; !ok { - if len(currentUser.Authorization.Scopes) != 1 || currentUser.Authorization.Scopes[0] != "all" { - httpserver.Error(w, "Token scope is not [all]", http.StatusForbidden) - return true - } - - if strings.HasPrefix(currentUser.Authorization.UUID, h.handler.Cluster.ClusterID) { - // Local user, submitting to a remote cluster. - // Create a new time-limited token. - newtok, err := h.handler.createAPItoken(req, currentUser.UUID, nil) - if err != nil { - httpserver.Error(w, err.Error(), http.StatusForbidden) - return true - } - containerRequest["runtime_token"] = newtok.TokenV2() - } else { - // Remote user. Container request will use the - // current token, minus the trailing portion - // (optional container uuid). - sp := strings.Split(creds.Tokens[0], "/") - if len(sp) >= 3 { - containerRequest["runtime_token"] = strings.Join(sp[0:3], "/") - } else { - containerRequest["runtime_token"] = creds.Tokens[0] - } - } - } - - newbody, err := json.Marshal(request) - buf := bytes.NewBuffer(newbody) - req.Body = ioutil.NopCloser(buf) - req.ContentLength = int64(buf.Len()) - req.Header.Set("Content-Length", fmt.Sprintf("%v", buf.Len())) - - resp, err := h.handler.remoteClusterRequest(*clusterID, req) - h.handler.proxy.ForwardResponse(w, resp, err) - return true -} diff --git a/lib/controller/federation.go b/lib/controller/federation.go index 419d8b0104..144d41c21b 100644 --- a/lib/controller/federation.go +++ b/lib/controller/federation.go @@ -94,20 +94,12 @@ func (h *Handler) setupProxyRemoteCluster(next http.Handler) http.Handler { wfHandler := &genericFederatedRequestHandler{next, h, wfRe, nil} containersHandler := &genericFederatedRequestHandler{next, h, containersRe, nil} - containerRequestsHandler := &genericFederatedRequestHandler{next, h, containerRequestsRe, - []federatedRequestDelegate{remoteContainerRequestCreate}} - collectionsRequestsHandler := &genericFederatedRequestHandler{next, h, collectionsRe, - []federatedRequestDelegate{fetchRemoteCollectionByUUID, fetchRemoteCollectionByPDH}} linksRequestsHandler := &genericFederatedRequestHandler{next, h, linksRe, nil} mux.Handle("/arvados/v1/workflows", wfHandler) mux.Handle("/arvados/v1/workflows/", wfHandler) mux.Handle("/arvados/v1/containers", containersHandler) mux.Handle("/arvados/v1/containers/", containersHandler) - mux.Handle("/arvados/v1/container_requests", containerRequestsHandler) - mux.Handle("/arvados/v1/container_requests/", containerRequestsHandler) - mux.Handle("/arvados/v1/collections", collectionsRequestsHandler) - mux.Handle("/arvados/v1/collections/", collectionsRequestsHandler) mux.Handle("/arvados/v1/links", linksRequestsHandler) mux.Handle("/arvados/v1/links/", linksRequestsHandler) mux.Handle("/", next) diff --git a/lib/controller/federation_test.go b/lib/controller/federation_test.go index f4cadd821b..6d74ab65f9 100644 --- a/lib/controller/federation_test.go +++ b/lib/controller/federation_test.go @@ -64,6 +64,9 @@ func (s *FederationSuite) SetUpTest(c *check.C) { cluster.API.MaxItemsPerResponse = 1000 cluster.API.MaxRequestAmplification = 4 cluster.API.RequestTimeout = arvados.Duration(5 * time.Minute) + cluster.Collections.BlobSigning = true + cluster.Collections.BlobSigningKey = arvadostest.BlobSigningKey + cluster.Collections.BlobSigningTTL = arvados.Duration(time.Hour * 24 * 14) arvadostest.SetServiceURL(&cluster.Services.RailsAPI, "http://localhost:1/") arvadostest.SetServiceURL(&cluster.Services.Controller, "http://localhost:/") s.testHandler = &Handler{Cluster: cluster} @@ -695,7 +698,6 @@ func (s *FederationSuite) TestCreateRemoteContainerRequestCheckRuntimeToken(c *c s.testHandler.Cluster.ClusterID = "zzzzz" s.testHandler.Cluster.SystemRootToken = arvadostest.SystemRootToken s.testHandler.Cluster.API.MaxTokenLifetime = arvados.Duration(time.Hour) - s.testHandler.Cluster.Collections.BlobSigningTTL = arvados.Duration(336 * time.Hour) // For some reason, this was set to 0h resp := s.testRequest(req).Result() c.Check(resp.StatusCode, check.Equals, http.StatusOK) diff --git a/lib/controller/integration_test.go b/lib/controller/integration_test.go index 26f0dbb0d1..7cad7d5f7a 100644 --- a/lib/controller/integration_test.go +++ b/lib/controller/integration_test.go @@ -20,6 +20,7 @@ import ( "path/filepath" "strconv" "strings" + "sync" "git.arvados.org/arvados.git/lib/boot" "git.arvados.org/arvados.git/lib/config" @@ -149,6 +150,16 @@ func (s *IntegrationSuite) TearDownSuite(c *check.C) { } } +func (s *IntegrationSuite) TestDefaultStorageClassesOnCollections(c *check.C) { + conn := s.testClusters["z1111"].Conn() + rootctx, _, _ := s.testClusters["z1111"].RootClients() + userctx, _, kc, _ := s.testClusters["z1111"].UserClients(rootctx, c, conn, s.oidcprovider.AuthEmail, true) + c.Assert(len(kc.DefaultStorageClasses) > 0, check.Equals, true) + coll, err := conn.CollectionCreate(userctx, arvados.CreateOptions{}) + c.Assert(err, check.IsNil) + c.Assert(coll.StorageClassesDesired, check.DeepEquals, kc.DefaultStorageClasses) +} + func (s *IntegrationSuite) TestGetCollectionByPDH(c *check.C) { conn1 := s.testClusters["z1111"].Conn() rootctx1, _, _ := s.testClusters["z1111"].RootClients() @@ -187,6 +198,49 @@ func (s *IntegrationSuite) TestGetCollectionByPDH(c *check.C) { c.Check(coll.PortableDataHash, check.Equals, pdh) } +// Tests bug #18004 +func (s *IntegrationSuite) TestRemoteUserAndTokenCacheRace(c *check.C) { + conn1 := s.testClusters["z1111"].Conn() + rootctx1, _, _ := s.testClusters["z1111"].RootClients() + rootctx2, _, _ := s.testClusters["z2222"].RootClients() + conn2 := s.testClusters["z2222"].Conn() + userctx1, _, _, _ := s.testClusters["z1111"].UserClients(rootctx1, c, conn1, "user2@example.com", true) + + var wg1, wg2 sync.WaitGroup + creqs := 100 + + // Make concurrent requests to z2222 with a local token to make sure more + // than one worker is listening. + wg1.Add(1) + for i := 0; i < creqs; i++ { + wg2.Add(1) + go func() { + defer wg2.Done() + wg1.Wait() + _, err := conn2.UserGetCurrent(rootctx2, arvados.GetOptions{}) + c.Check(err, check.IsNil, check.Commentf("warm up phase failed")) + }() + } + wg1.Done() + wg2.Wait() + + // Real test pass -- use a new remote token than the one used in the warm-up + // phase. + wg1.Add(1) + for i := 0; i < creqs; i++ { + wg2.Add(1) + go func() { + defer wg2.Done() + wg1.Wait() + // Retrieve the remote collection from cluster z2222. + _, err := conn2.UserGetCurrent(userctx1, arvados.GetOptions{}) + c.Check(err, check.IsNil, check.Commentf("testing phase failed")) + }() + } + wg1.Done() + wg2.Wait() +} + func (s *IntegrationSuite) TestS3WithFederatedToken(c *check.C) { if _, err := exec.LookPath("s3cmd"); err != nil { c.Skip("s3cmd not in PATH") @@ -502,7 +556,7 @@ func (s *IntegrationSuite) TestRequestIDHeader(c *check.C) { } // We test the direct access to the database -// normally an integration test would not have a database access, but in this case we need +// normally an integration test would not have a database access, but in this case we need // to test tokens that are secret, so there is no API response that will give them back func (s *IntegrationSuite) dbConn(c *check.C, clusterID string) (*sql.DB, *sql.Conn) { ctx := context.Background() @@ -608,6 +662,87 @@ func (s *IntegrationSuite) TestIntermediateCluster(c *check.C) { } } +// Test for bug #18076 +func (s *IntegrationSuite) TestStaleCachedUserRecord(c *check.C) { + rootctx1, _, _ := s.testClusters["z1111"].RootClients() + conn1 := s.testClusters["z1111"].Conn() + conn3 := s.testClusters["z3333"].Conn() + + // Make sure LoginCluster is properly configured + for cls := range s.testClusters { + if cls == "z1111" || cls == "z3333" { + c.Check( + s.testClusters[cls].Config.Clusters[cls].Login.LoginCluster, + check.Equals, "z1111", + check.Commentf("incorrect LoginCluster config on cluster %q", cls)) + } + } + + // Create some users, request them on the federated cluster so they're cached. + var users []arvados.User + for userNr := 0; userNr < 2; userNr++ { + _, _, _, user := s.testClusters["z1111"].UserClients( + rootctx1, + c, + conn1, + fmt.Sprintf("user%d@example.com", userNr), + true) + c.Assert(user.Username, check.Not(check.Equals), "") + users = append(users, user) + + lst, err := conn3.UserList(rootctx1, arvados.ListOptions{Limit: -1}) + c.Assert(err, check.Equals, nil) + userFound := false + for _, fedUser := range lst.Items { + if fedUser.UUID == user.UUID { + c.Assert(fedUser.Username, check.Equals, user.Username) + userFound = true + break + } + } + c.Assert(userFound, check.Equals, true) + } + + // Swap the usernames + _, err := conn1.UserUpdate(rootctx1, arvados.UpdateOptions{ + UUID: users[0].UUID, + Attrs: map[string]interface{}{ + "username": "", + }, + }) + c.Assert(err, check.Equals, nil) + _, err = conn1.UserUpdate(rootctx1, arvados.UpdateOptions{ + UUID: users[1].UUID, + Attrs: map[string]interface{}{ + "username": users[0].Username, + }, + }) + c.Assert(err, check.Equals, nil) + _, err = conn1.UserUpdate(rootctx1, arvados.UpdateOptions{ + UUID: users[0].UUID, + Attrs: map[string]interface{}{ + "username": users[1].Username, + }, + }) + c.Assert(err, check.Equals, nil) + + // Re-request the list on the federated cluster & check for updates + lst, err := conn3.UserList(rootctx1, arvados.ListOptions{Limit: -1}) + c.Assert(err, check.Equals, nil) + var user0Found, user1Found bool + for _, user := range lst.Items { + if user.UUID == users[0].UUID { + user0Found = true + c.Assert(user.Username, check.Equals, users[1].Username) + } else if user.UUID == users[1].UUID { + user1Found = true + c.Assert(user.Username, check.Equals, users[0].Username) + } + } + c.Assert(user0Found, check.Equals, true) + c.Assert(user1Found, check.Equals, true) +} + // Test for bug #16263 func (s *IntegrationSuite) TestListUsers(c *check.C) { rootctx1, _, _ := s.testClusters["z1111"].RootClients() @@ -632,6 +767,7 @@ func (s *IntegrationSuite) TestListUsers(c *check.C) { for _, user := range lst.Items { if user.Username == "" { nullUsername = true + break } } c.Assert(nullUsername, check.Equals, true) diff --git a/lib/controller/localdb/collection.go b/lib/controller/localdb/collection.go new file mode 100644 index 0000000000..d81dd812bf --- /dev/null +++ b/lib/controller/localdb/collection.go @@ -0,0 +1,102 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: AGPL-3.0 + +package localdb + +import ( + "context" + "time" + + "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/auth" +) + +// CollectionGet defers to railsProxy for everything except blob +// signatures. +func (conn *Conn) CollectionGet(ctx context.Context, opts arvados.GetOptions) (arvados.Collection, error) { + if len(opts.Select) > 0 { + // We need to know IsTrashed and TrashAt to implement + // signing properly, even if the caller doesn't want + // them. + opts.Select = append([]string{"is_trashed", "trash_at"}, opts.Select...) + } + resp, err := conn.railsProxy.CollectionGet(ctx, opts) + if err != nil { + return resp, err + } + conn.signCollection(ctx, &resp) + return resp, nil +} + +// CollectionList defers to railsProxy for everything except blob +// signatures. +func (conn *Conn) CollectionList(ctx context.Context, opts arvados.ListOptions) (arvados.CollectionList, error) { + if len(opts.Select) > 0 { + // We need to know IsTrashed and TrashAt to implement + // signing properly, even if the caller doesn't want + // them. + opts.Select = append([]string{"is_trashed", "trash_at"}, opts.Select...) + } + resp, err := conn.railsProxy.CollectionList(ctx, opts) + if err != nil { + return resp, err + } + for i := range resp.Items { + conn.signCollection(ctx, &resp.Items[i]) + } + return resp, nil +} + +// CollectionCreate defers to railsProxy for everything except blob +// signatures. +func (conn *Conn) CollectionCreate(ctx context.Context, opts arvados.CreateOptions) (arvados.Collection, error) { + if len(opts.Select) > 0 { + // We need to know IsTrashed and TrashAt to implement + // signing properly, even if the caller doesn't want + // them. + opts.Select = append([]string{"is_trashed", "trash_at"}, opts.Select...) + } + resp, err := conn.railsProxy.CollectionCreate(ctx, opts) + if err != nil { + return resp, err + } + conn.signCollection(ctx, &resp) + return resp, nil +} + +// CollectionUpdate defers to railsProxy for everything except blob +// signatures. +func (conn *Conn) CollectionUpdate(ctx context.Context, opts arvados.UpdateOptions) (arvados.Collection, error) { + if len(opts.Select) > 0 { + // We need to know IsTrashed and TrashAt to implement + // signing properly, even if the caller doesn't want + // them. + opts.Select = append([]string{"is_trashed", "trash_at"}, opts.Select...) + } + resp, err := conn.railsProxy.CollectionUpdate(ctx, opts) + if err != nil { + return resp, err + } + conn.signCollection(ctx, &resp) + return resp, nil +} + +func (conn *Conn) signCollection(ctx context.Context, coll *arvados.Collection) { + if coll.IsTrashed || coll.ManifestText == "" || !conn.cluster.Collections.BlobSigning { + return + } + var token string + if creds, ok := auth.FromContext(ctx); ok && len(creds.Tokens) > 0 { + token = creds.Tokens[0] + } + if token == "" { + return + } + ttl := conn.cluster.Collections.BlobSigningTTL.Duration() + exp := time.Now().Add(ttl) + if coll.TrashAt != nil && !coll.TrashAt.IsZero() && coll.TrashAt.Before(exp) { + exp = *coll.TrashAt + } + coll.ManifestText = arvados.SignManifest(coll.ManifestText, token, exp, ttl, []byte(conn.cluster.Collections.BlobSigningKey)) +} diff --git a/lib/controller/localdb/collection_test.go b/lib/controller/localdb/collection_test.go new file mode 100644 index 0000000000..e0de7256a1 --- /dev/null +++ b/lib/controller/localdb/collection_test.go @@ -0,0 +1,142 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: AGPL-3.0 + +package localdb + +import ( + "context" + "regexp" + "strconv" + "time" + + "git.arvados.org/arvados.git/lib/config" + "git.arvados.org/arvados.git/lib/controller/rpc" + "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/arvadostest" + "git.arvados.org/arvados.git/sdk/go/auth" + "git.arvados.org/arvados.git/sdk/go/ctxlog" + check "gopkg.in/check.v1" +) + +var _ = check.Suite(&CollectionSuite{}) + +type CollectionSuite struct { + cluster *arvados.Cluster + localdb *Conn + railsSpy *arvadostest.Proxy +} + +func (s *CollectionSuite) TearDownSuite(c *check.C) { + // Undo any changes/additions to the user database so they + // don't affect subsequent tests. + arvadostest.ResetEnv() + c.Check(arvados.NewClientFromEnv().RequestAndDecode(nil, "POST", "database/reset", nil, nil), check.IsNil) +} + +func (s *CollectionSuite) SetUpTest(c *check.C) { + cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load() + c.Assert(err, check.IsNil) + s.cluster, err = cfg.GetCluster("") + c.Assert(err, check.IsNil) + s.localdb = NewConn(s.cluster) + s.railsSpy = arvadostest.NewProxy(c, s.cluster.Services.RailsAPI) + *s.localdb.railsProxy = *rpc.NewConn(s.cluster.ClusterID, s.railsSpy.URL, true, rpc.PassthroughTokenProvider) +} + +func (s *CollectionSuite) TearDownTest(c *check.C) { + s.railsSpy.Close() +} + +func (s *CollectionSuite) TestSignatures(c *check.C) { + ctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{arvadostest.ActiveTokenV2}}) + + resp, err := s.localdb.CollectionGet(ctx, arvados.GetOptions{UUID: arvadostest.FooCollection}) + c.Check(err, check.IsNil) + c.Check(resp.ManifestText, check.Matches, `(?ms).* acbd[^ ]*\+3\+A[0-9a-f]+@[0-9a-f]+ 0:.*`) + s.checkSignatureExpiry(c, resp.ManifestText, time.Hour*24*7*2) + + resp, err = s.localdb.CollectionGet(ctx, arvados.GetOptions{UUID: arvadostest.FooCollection, Select: []string{"manifest_text"}}) + c.Check(err, check.IsNil) + c.Check(resp.ManifestText, check.Matches, `(?ms).* acbd[^ ]*\+3\+A[0-9a-f]+@[0-9a-f]+ 0:.*`) + + lresp, err := s.localdb.CollectionList(ctx, arvados.ListOptions{Limit: -1, Filters: []arvados.Filter{{"uuid", "=", arvadostest.FooCollection}}}) + c.Check(err, check.IsNil) + if c.Check(lresp.Items, check.HasLen, 1) { + c.Check(lresp.Items[0].UUID, check.Equals, arvadostest.FooCollection) + c.Check(lresp.Items[0].ManifestText, check.Equals, "") + c.Check(lresp.Items[0].UnsignedManifestText, check.Equals, "") + } + + lresp, err = s.localdb.CollectionList(ctx, arvados.ListOptions{Limit: -1, Filters: []arvados.Filter{{"uuid", "=", arvadostest.FooCollection}}, Select: []string{"manifest_text"}}) + c.Check(err, check.IsNil) + if c.Check(lresp.Items, check.HasLen, 1) { + c.Check(lresp.Items[0].ManifestText, check.Matches, `(?ms).* acbd[^ ]*\+3\+A[0-9a-f]+@[0-9a-f]+ 0:.*`) + c.Check(lresp.Items[0].UnsignedManifestText, check.Equals, "") + } + + lresp, err = s.localdb.CollectionList(ctx, arvados.ListOptions{Limit: -1, Filters: []arvados.Filter{{"uuid", "=", arvadostest.FooCollection}}, Select: []string{"unsigned_manifest_text"}}) + c.Check(err, check.IsNil) + if c.Check(lresp.Items, check.HasLen, 1) { + c.Check(lresp.Items[0].ManifestText, check.Equals, "") + c.Check(lresp.Items[0].UnsignedManifestText, check.Matches, `(?ms).* acbd[^ ]*\+3 0:.*`) + } + + // early trash date causes lower signature TTL (even if + // trash_at and is_trashed fields are unselected) + trashed, err := s.localdb.CollectionCreate(ctx, arvados.CreateOptions{ + Select: []string{"uuid", "manifest_text"}, + Attrs: map[string]interface{}{ + "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n", + "trash_at": time.Now().UTC().Add(time.Hour), + }}) + c.Assert(err, check.IsNil) + s.checkSignatureExpiry(c, trashed.ManifestText, time.Hour) + resp, err = s.localdb.CollectionGet(ctx, arvados.GetOptions{UUID: trashed.UUID}) + c.Assert(err, check.IsNil) + s.checkSignatureExpiry(c, resp.ManifestText, time.Hour) + + // distant future trash date does not cause higher signature TTL + trashed, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{ + UUID: trashed.UUID, + Attrs: map[string]interface{}{ + "trash_at": time.Now().UTC().Add(time.Hour * 24 * 365), + }}) + c.Assert(err, check.IsNil) + s.checkSignatureExpiry(c, trashed.ManifestText, time.Hour*24*7*2) + resp, err = s.localdb.CollectionGet(ctx, arvados.GetOptions{UUID: trashed.UUID}) + c.Assert(err, check.IsNil) + s.checkSignatureExpiry(c, resp.ManifestText, time.Hour*24*7*2) + + // Make sure groups/contents doesn't return manifest_text with + // collections (if it did, we'd need to sign it). + gresp, err := s.localdb.GroupContents(ctx, arvados.GroupContentsOptions{ + Limit: -1, + Filters: []arvados.Filter{{"uuid", "=", arvadostest.FooCollection}}, + Select: []string{"uuid", "manifest_text"}, + }) + c.Check(err, check.IsNil) + if c.Check(gresp.Items, check.HasLen, 1) { + c.Check(gresp.Items[0].(map[string]interface{})["uuid"], check.Equals, arvadostest.FooCollection) + c.Check(gresp.Items[0].(map[string]interface{})["manifest_text"], check.Equals, nil) + } +} + +func (s *CollectionSuite) checkSignatureExpiry(c *check.C, manifestText string, expectedTTL time.Duration) { + m := regexp.MustCompile(`@([[:xdigit:]]+)`).FindStringSubmatch(manifestText) + c.Assert(m, check.HasLen, 2) + sigexp, err := strconv.ParseInt(m[1], 16, 64) + c.Assert(err, check.IsNil) + expectedExp := time.Now().Add(expectedTTL).Unix() + c.Check(sigexp > expectedExp-60, check.Equals, true) + c.Check(sigexp <= expectedExp, check.Equals, true) +} + +func (s *CollectionSuite) TestSignaturesDisabled(c *check.C) { + s.localdb.cluster.Collections.BlobSigning = false + ctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{arvadostest.ActiveTokenV2}}) + + resp, err := s.localdb.CollectionGet(ctx, arvados.GetOptions{UUID: arvadostest.FooCollection}) + c.Check(err, check.IsNil) + c.Check(resp.ManifestText, check.Matches, `(?ms).* acbd[^ +]*\+3 0:.*`) +} diff --git a/lib/controller/server_test.go b/lib/controller/server_test.go index e3558c3f41..051f355716 100644 --- a/lib/controller/server_test.go +++ b/lib/controller/server_test.go @@ -9,6 +9,7 @@ import ( "net/http" "os" "path/filepath" + "time" "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/arvadostest" @@ -39,6 +40,9 @@ func newServerFromIntegrationTestEnv(c *check.C) *httpserver.Server { PostgreSQL: integrationTestCluster().PostgreSQL, }} handler.Cluster.TLS.Insecure = true + handler.Cluster.Collections.BlobSigning = true + handler.Cluster.Collections.BlobSigningKey = arvadostest.BlobSigningKey + handler.Cluster.Collections.BlobSigningTTL = arvados.Duration(time.Hour * 24 * 14) arvadostest.SetServiceURL(&handler.Cluster.Services.RailsAPI, "https://"+os.Getenv("ARVADOS_TEST_API_HOST")) arvadostest.SetServiceURL(&handler.Cluster.Services.Controller, "http://localhost:/") diff --git a/lib/costanalyzer/costanalyzer_test.go b/lib/costanalyzer/costanalyzer_test.go index 9fee66e1dd..2975e3b3de 100644 --- a/lib/costanalyzer/costanalyzer_test.go +++ b/lib/costanalyzer/costanalyzer_test.go @@ -33,7 +33,6 @@ func (s *Suite) TearDownSuite(c *check.C) { } func (s *Suite) SetUpSuite(c *check.C) { - arvadostest.StartAPI() arvadostest.StartKeep(2, true) // Get the various arvados, arvadosclient, and keep client objects diff --git a/lib/crunchrun/singularity.go b/lib/crunchrun/singularity.go index 741f542454..61fecad0a1 100644 --- a/lib/crunchrun/singularity.go +++ b/lib/crunchrun/singularity.go @@ -101,7 +101,7 @@ func (e *singularityExecutor) checkImageCache(dockerImageID string, container ar if len(cl.Items) == 1 { imageCollection = cl.Items[0] } else { - collectionName := collectionName + " " + time.Now().UTC().Format(time.RFC3339) + collectionName := "converting " + collectionName exp := time.Now().Add(24 * 7 * 2 * time.Hour) err = containerClient.RequestAndDecode(&imageCollection, arvados.EndpointCollectionCreate.Method, @@ -112,6 +112,7 @@ func (e *singularityExecutor) checkImageCache(dockerImageID string, container ar "name": collectionName, "trash_at": exp.UTC().Format(time.RFC3339), }, + "ensure_unique_name": true, }) if err != nil { return nil, fmt.Errorf("error creating '%v' collection: %s", collectionName, err) @@ -141,6 +142,12 @@ func (e *singularityExecutor) LoadImage(dockerImageID string, imageTarballPath s } if _, err := os.Stat(imageFilename); os.IsNotExist(err) { + // Make sure the docker image is readable, and error + // out if not. + if _, err := os.Stat(imageTarballPath); err != nil { + return err + } + e.logf("building singularity image") // "singularity build" does not accept a // docker-archive://... filename containing a ":" character, diff --git a/lib/lsf/dispatch_test.go b/lib/lsf/dispatch_test.go index 7cf6df6431..96151fa8da 100644 --- a/lib/lsf/dispatch_test.go +++ b/lib/lsf/dispatch_test.go @@ -53,7 +53,7 @@ type lsfstub struct { errorRate float64 } -func (stub lsfstub) stubCommand(c *check.C) func(prog string, args ...string) *exec.Cmd { +func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...string) *exec.Cmd { mtx := sync.Mutex{} nextjobid := 100 fakejobq := map[int]string{} @@ -71,7 +71,11 @@ func (stub lsfstub) stubCommand(c *check.C) func(prog string, args ...string) *e } switch prog { case "bsub": - c.Assert(args, check.HasLen, 4) + defaultArgs := s.disp.Cluster.Containers.LSF.BsubArgumentsList + c.Assert(args, check.HasLen, 4+len(defaultArgs)) + c.Check(args[:len(defaultArgs)], check.DeepEquals, defaultArgs) + args = args[len(defaultArgs):] + c.Check(args[0], check.Equals, "-J") switch args[1] { case arvadostest.LockedContainerUUID: @@ -124,7 +128,7 @@ func (s *suite) TestSubmit(c *check.C) { s.disp.lsfcli.stubCommand = lsfstub{ errorRate: 0.1, sudoUser: s.disp.Cluster.Containers.LSF.BsubSudoUser, - }.stubCommand(c) + }.stubCommand(s, c) s.disp.Start() deadline := time.Now().Add(20 * time.Second) for range time.NewTicker(time.Second).C { diff --git a/lib/recovercollection/cmd_test.go b/lib/recovercollection/cmd_test.go index 7b3c8e1b4e..f891e55677 100644 --- a/lib/recovercollection/cmd_test.go +++ b/lib/recovercollection/cmd_test.go @@ -27,7 +27,6 @@ var _ = check.Suite(&Suite{}) type Suite struct{} func (*Suite) SetUpSuite(c *check.C) { - arvadostest.StartAPI() arvadostest.StartKeep(2, true) } diff --git a/sdk/go/arvados/api.go b/sdk/go/arvados/api.go index a57f2a6838..736ace75e7 100644 --- a/sdk/go/arvados/api.go +++ b/sdk/go/arvados/api.go @@ -133,6 +133,7 @@ type CreateOptions struct { type UpdateOptions struct { UUID string `json:"uuid"` Attrs map[string]interface{} `json:"attrs"` + Select []string `json:"select"` BypassFederation bool `json:"bypass_federation"` } diff --git a/sdk/go/arvadosclient/arvadosclient.go b/sdk/go/arvadosclient/arvadosclient.go index 4c594625e7..8f902d3a09 100644 --- a/sdk/go/arvadosclient/arvadosclient.go +++ b/sdk/go/arvadosclient/arvadosclient.go @@ -146,7 +146,7 @@ func MakeTLSConfig(insecure bool) *tls.Config { data, err := ioutil.ReadFile(file) if err != nil { if !os.IsNotExist(err) { - log.Printf("error reading %q: %s", file, err) + log.Printf("proceeding without loading cert file %q: %s", file, err) } continue } @@ -426,6 +426,24 @@ func (c *ArvadosClient) Discovery(parameter string) (value interface{}, err erro return value, ErrInvalidArgument } +// ClusterConfig returns the value of the given key in the current cluster's +// exported config. If key is an empty string, it'll return the entire config. +func (c *ArvadosClient) ClusterConfig(key string) (config interface{}, err error) { + var clusterConfig interface{} + err = c.Call("GET", "config", "", "", nil, &clusterConfig) + if err != nil { + return nil, err + } + if key == "" { + return clusterConfig, nil + } + configData, ok := clusterConfig.(map[string]interface{})[key] + if !ok { + return nil, ErrInvalidArgument + } + return configData, nil +} + func (c *ArvadosClient) httpClient() *http.Client { if c.Client != nil { return c.Client diff --git a/sdk/go/arvadosclient/arvadosclient_test.go b/sdk/go/arvadosclient/arvadosclient_test.go index 9d6e4fe7e8..27e23c1aea 100644 --- a/sdk/go/arvadosclient/arvadosclient_test.go +++ b/sdk/go/arvadosclient/arvadosclient_test.go @@ -158,6 +158,36 @@ func (s *ServerRequiredSuite) TestAPIDiscovery_Get_noSuchParameter(c *C) { c.Assert(value, IsNil) } +func (s *ServerRequiredSuite) TestAPIClusterConfig_Get_StorageClasses(c *C) { + arv, err := MakeArvadosClient() + c.Assert(err, IsNil) + data, err := arv.ClusterConfig("StorageClasses") + c.Assert(err, IsNil) + c.Assert(data, NotNil) + clusterConfig := data.(map[string]interface{}) + _, ok := clusterConfig["default"] + c.Assert(ok, Equals, true) +} + +func (s *ServerRequiredSuite) TestAPIClusterConfig_Get_All(c *C) { + arv, err := MakeArvadosClient() + c.Assert(err, IsNil) + data, err := arv.ClusterConfig("") + c.Assert(err, IsNil) + c.Assert(data, NotNil) + clusterConfig := data.(map[string]interface{}) + _, ok := clusterConfig["StorageClasses"] + c.Assert(ok, Equals, true) +} + +func (s *ServerRequiredSuite) TestAPIClusterConfig_Get_noSuchSection(c *C) { + arv, err := MakeArvadosClient() + c.Assert(err, IsNil) + data, err := arv.ClusterConfig("noSuchSection") + c.Assert(err, NotNil) + c.Assert(data, IsNil) +} + func (s *ServerRequiredSuite) TestCreateLarge(c *C) { arv, err := MakeArvadosClient() c.Assert(err, IsNil) diff --git a/sdk/go/arvadostest/run_servers.go b/sdk/go/arvadostest/run_servers.go index 5b01db5c4b..8f70c5ee26 100644 --- a/sdk/go/arvadostest/run_servers.go +++ b/sdk/go/arvadostest/run_servers.go @@ -5,53 +5,40 @@ package arvadostest import ( - "bufio" - "bytes" + "crypto/tls" "fmt" "io/ioutil" "log" + "net/http" "os" "os/exec" "path" "strconv" "strings" + + "gopkg.in/check.v1" ) var authSettings = make(map[string]string) -// ResetEnv resets test env +// ResetEnv resets ARVADOS_* env vars to whatever they were the first +// time this func was called. +// +// Call it from your SetUpTest or SetUpSuite func if your tests modify +// env vars. func ResetEnv() { - for k, v := range authSettings { - os.Setenv(k, v) - } -} - -// APIHost returns the address:port of the current test server. -func APIHost() string { - h := authSettings["ARVADOS_API_HOST"] - if h == "" { - log.Fatal("arvadostest.APIHost() was called but authSettings is not populated") - } - return h -} - -// ParseAuthSettings parses auth settings from given input -func ParseAuthSettings(authScript []byte) { - scanner := bufio.NewScanner(bytes.NewReader(authScript)) - for scanner.Scan() { - line := scanner.Text() - if 0 != strings.Index(line, "export ") { - log.Printf("Ignoring: %v", line) - continue + if len(authSettings) == 0 { + for _, e := range os.Environ() { + e := strings.SplitN(e, "=", 2) + if len(e) == 2 { + authSettings[e[0]] = e[1] + } } - toks := strings.SplitN(strings.Replace(line, "export ", "", 1), "=", 2) - if len(toks) == 2 { - authSettings[toks[0]] = toks[1] - } else { - log.Fatalf("Could not parse: %v", line) + } else { + for k, v := range authSettings { + os.Setenv(k, v) } } - log.Printf("authSettings: %v", authSettings) } var pythonTestDir string @@ -80,34 +67,17 @@ func chdirToPythonTests() { } } -// StartAPI starts test API server -func StartAPI() { - cwd, _ := os.Getwd() - defer os.Chdir(cwd) - chdirToPythonTests() - - cmd := exec.Command("python", "run_test_server.py", "start", "--auth", "admin") - cmd.Stdin = nil - cmd.Stderr = os.Stderr - - authScript, err := cmd.Output() - if err != nil { - log.Fatalf("%+v: %s", cmd.Args, err) - } - ParseAuthSettings(authScript) - ResetEnv() -} - -// StopAPI stops test API server -func StopAPI() { - cwd, _ := os.Getwd() - defer os.Chdir(cwd) - chdirToPythonTests() - - cmd := exec.Command("python", "run_test_server.py", "stop") - bgRun(cmd) - // Without Wait, "go test" in go1.10.1 tends to hang. https://github.com/golang/go/issues/24050 - cmd.Wait() +func ResetDB(c *check.C) { + hc := http.Client{Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + }} + req, err := http.NewRequest("POST", "https://"+os.Getenv("ARVADOS_TEST_API_HOST")+"/database/reset", nil) + c.Assert(err, check.IsNil) + req.Header.Set("Authorization", "Bearer "+AdminToken) + resp, err := hc.Do(req) + c.Assert(err, check.IsNil) + defer resp.Body.Close() + c.Check(resp.StatusCode, check.Equals, http.StatusOK) } // StartKeep starts the given number of keep servers, diff --git a/sdk/go/dispatch/dispatch_test.go b/sdk/go/dispatch/dispatch_test.go index 4b115229b4..2a9d84639e 100644 --- a/sdk/go/dispatch/dispatch_test.go +++ b/sdk/go/dispatch/dispatch_test.go @@ -18,14 +18,6 @@ var _ = Suite(&suite{}) type suite struct{} -func (s *suite) SetUpSuite(c *C) { - arvadostest.StartAPI() -} - -func (s *suite) TearDownSuite(c *C) { - arvadostest.StopAPI() -} - func (s *suite) TestTrackContainer(c *C) { arv, err := arvadosclient.MakeArvadosClient() c.Assert(err, Equals, nil) diff --git a/sdk/go/keepclient/keepclient.go b/sdk/go/keepclient/keepclient.go index 3bc6f4afcd..68ac886ddd 100644 --- a/sdk/go/keepclient/keepclient.go +++ b/sdk/go/keepclient/keepclient.go @@ -97,17 +97,18 @@ type HTTPClient interface { // KeepClient holds information about Arvados and Keep servers. type KeepClient struct { - Arvados *arvadosclient.ArvadosClient - Want_replicas int - localRoots map[string]string - writableLocalRoots map[string]string - gatewayRoots map[string]string - lock sync.RWMutex - HTTPClient HTTPClient - Retries int - BlockCache *BlockCache - RequestID string - StorageClasses []string + Arvados *arvadosclient.ArvadosClient + Want_replicas int + localRoots map[string]string + writableLocalRoots map[string]string + gatewayRoots map[string]string + lock sync.RWMutex + HTTPClient HTTPClient + Retries int + BlockCache *BlockCache + RequestID string + StorageClasses []string + DefaultStorageClasses []string // Set by cluster's exported config // set to 1 if all writable services are of disk type, otherwise 0 replicasPerService int @@ -119,7 +120,23 @@ type KeepClient struct { disableDiscovery bool } -// MakeKeepClient creates a new KeepClient, calls +func (kc *KeepClient) loadDefaultClasses() error { + scData, err := kc.Arvados.ClusterConfig("StorageClasses") + if err != nil { + return err + } + classes := scData.(map[string]interface{}) + for scName := range classes { + scConf, _ := classes[scName].(map[string]interface{}) + isDefault, ok := scConf["Default"].(bool) + if ok && isDefault { + kc.DefaultStorageClasses = append(kc.DefaultStorageClasses, scName) + } + } + return nil +} + +// MakeKeepClient creates a new KeepClient, loads default storage classes, calls // DiscoverKeepServices(), and returns when the client is ready to // use. func MakeKeepClient(arv *arvadosclient.ArvadosClient) (*KeepClient, error) { @@ -138,11 +155,16 @@ func New(arv *arvadosclient.ArvadosClient) *KeepClient { defaultReplicationLevel = int(v) } } - return &KeepClient{ + kc := &KeepClient{ Arvados: arv, Want_replicas: defaultReplicationLevel, Retries: 2, } + err = kc.loadDefaultClasses() + if err != nil { + DebugPrintf("DEBUG: Unable to load the default storage classes cluster config") + } + return kc } // PutHR puts a block given the block hash, a reader, and the number of bytes diff --git a/sdk/go/keepclient/keepclient_test.go b/sdk/go/keepclient/keepclient_test.go index 62268fa463..a6e0a11d51 100644 --- a/sdk/go/keepclient/keepclient_test.go +++ b/sdk/go/keepclient/keepclient_test.go @@ -24,7 +24,6 @@ import ( "git.arvados.org/arvados.git/sdk/go/arvadosclient" "git.arvados.org/arvados.git/sdk/go/arvadostest" . "gopkg.in/check.v1" - check "gopkg.in/check.v1" ) // Gocheck boilerplate @@ -52,13 +51,11 @@ func pythonDir() string { } func (s *ServerRequiredSuite) SetUpSuite(c *C) { - arvadostest.StartAPI() arvadostest.StartKeep(2, false) } func (s *ServerRequiredSuite) TearDownSuite(c *C) { arvadostest.StopKeep(2) - arvadostest.StopAPI() } func (s *ServerRequiredSuite) SetUpTest(c *C) { @@ -78,9 +75,22 @@ func (s *ServerRequiredSuite) TestMakeKeepClient(c *C) { } } +func (s *ServerRequiredSuite) TestDefaultStorageClasses(c *C) { + arv, err := arvadosclient.MakeArvadosClient() + c.Assert(err, IsNil) + + cc, err := arv.ClusterConfig("StorageClasses") + c.Assert(err, IsNil) + c.Assert(cc, NotNil) + c.Assert(cc.(map[string]interface{})["default"], NotNil) + + kc := New(arv) + c.Assert(kc.DefaultStorageClasses, DeepEquals, []string{"default"}) +} + func (s *ServerRequiredSuite) TestDefaultReplications(c *C) { arv, err := arvadosclient.MakeArvadosClient() - c.Assert(err, Equals, nil) + c.Assert(err, IsNil) kc, err := MakeKeepClient(arv) c.Check(err, IsNil) @@ -135,7 +145,7 @@ func RunFakeKeepServer(st http.Handler) (ks KeepServer) { // bind to 0.0.0.0 or [::] which is not a valid address for Dial() ks.listener, err = net.ListenTCP("tcp", &net.TCPAddr{IP: []byte{127, 0, 0, 1}, Port: 0}) if err != nil { - panic(fmt.Sprintf("Could not listen on any port")) + panic("Could not listen on any port") } ks.url = fmt.Sprintf("http://%s", ks.listener.Addr().String()) go http.Serve(ks.listener, st) @@ -242,28 +252,107 @@ func (s *StandaloneSuite) TestUploadWithStorageClasses(c *C) { } } -func (s *StandaloneSuite) TestPutWithStorageClasses(c *C) { +func (s *StandaloneSuite) TestPutWithoutStorageClassesClusterSupport(c *C) { nServers := 5 for _, trial := range []struct { replicas int clientClasses []string - putClasses []string // putClasses takes precedence over clientClasses + putClasses []string minRequests int maxRequests int success bool }{ + // Talking to an older cluster (no default storage classes exported + // config) and no other additional storage classes requirements. + {1, nil, nil, 1, 1, true}, + {2, nil, nil, 2, 2, true}, + {3, nil, nil, 3, 3, true}, + {nServers*2 + 1, nil, nil, nServers, nServers, false}, + {1, []string{"class1"}, nil, 1, 1, true}, - {2, []string{"class1"}, nil, 1, 2, true}, - {3, []string{"class1"}, nil, 2, 3, true}, + {2, []string{"class1"}, nil, 2, 2, true}, + {3, []string{"class1"}, nil, 3, 3, true}, {1, []string{"class1", "class2"}, nil, 1, 1, true}, - {3, nil, []string{"class1"}, 2, 3, true}, - {1, nil, []string{"class1", "class2"}, 1, 1, true}, - {1, []string{"class404"}, []string{"class1", "class2"}, 1, 1, true}, - {1, []string{"class1"}, []string{"class404", "class2"}, nServers, nServers, false}, {nServers*2 + 1, []string{"class1"}, nil, nServers, nServers, false}, - {1, []string{"class404"}, nil, nServers, nServers, false}, - {1, []string{"class1", "class404"}, nil, nServers, nServers, false}, - {1, nil, []string{"class1", "class404"}, nServers, nServers, false}, + + {1, nil, []string{"class1"}, 1, 1, true}, + {2, nil, []string{"class1"}, 2, 2, true}, + {3, nil, []string{"class1"}, 3, 3, true}, + {1, nil, []string{"class1", "class2"}, 1, 1, true}, + {nServers*2 + 1, nil, []string{"class1"}, nServers, nServers, false}, + } { + c.Logf("%+v", trial) + st := &StubPutHandler{ + c: c, + expectPath: "acbd18db4cc2f85cedef654fccc4a4d8", + expectAPIToken: "abc123", + expectBody: "foo", + expectStorageClass: "*", + returnStorageClasses: "", // Simulate old cluster without SC keep support + handled: make(chan string, 100), + } + ks := RunSomeFakeKeepServers(st, nServers) + arv, _ := arvadosclient.MakeArvadosClient() + kc, _ := MakeKeepClient(arv) + kc.Want_replicas = trial.replicas + kc.StorageClasses = trial.clientClasses + kc.DefaultStorageClasses = nil // Simulate an old cluster without SC defaults + arv.ApiToken = "abc123" + localRoots := make(map[string]string) + writableLocalRoots := make(map[string]string) + for i, k := range ks { + localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url + writableLocalRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url + defer k.listener.Close() + } + kc.SetServiceRoots(localRoots, writableLocalRoots, nil) + + _, err := kc.BlockWrite(context.Background(), arvados.BlockWriteOptions{ + Data: []byte("foo"), + StorageClasses: trial.putClasses, + }) + if trial.success { + c.Check(err, IsNil) + } else { + c.Check(err, NotNil) + } + c.Check(len(st.handled) >= trial.minRequests, Equals, true, Commentf("len(st.handled)==%d, trial.minRequests==%d", len(st.handled), trial.minRequests)) + c.Check(len(st.handled) <= trial.maxRequests, Equals, true, Commentf("len(st.handled)==%d, trial.maxRequests==%d", len(st.handled), trial.maxRequests)) + if trial.clientClasses == nil && trial.putClasses == nil { + c.Check(st.requests[0].Header.Get("X-Keep-Storage-Classes"), Equals, "") + } + } +} + +func (s *StandaloneSuite) TestPutWithStorageClasses(c *C) { + nServers := 5 + for _, trial := range []struct { + replicas int + defaultClasses []string + clientClasses []string // clientClasses takes precedence over defaultClasses + putClasses []string // putClasses takes precedence over clientClasses + minRequests int + maxRequests int + success bool + }{ + {1, []string{"class1"}, nil, nil, 1, 1, true}, + {2, []string{"class1"}, nil, nil, 1, 2, true}, + {3, []string{"class1"}, nil, nil, 2, 3, true}, + {1, []string{"class1", "class2"}, nil, nil, 1, 1, true}, + + // defaultClasses doesn't matter when any of the others is specified. + {1, []string{"class1"}, []string{"class1"}, nil, 1, 1, true}, + {2, []string{"class1"}, []string{"class1"}, nil, 1, 2, true}, + {3, []string{"class1"}, []string{"class1"}, nil, 2, 3, true}, + {1, []string{"class1"}, []string{"class1", "class2"}, nil, 1, 1, true}, + {3, []string{"class1"}, nil, []string{"class1"}, 2, 3, true}, + {1, []string{"class1"}, nil, []string{"class1", "class2"}, 1, 1, true}, + {1, []string{"class1"}, []string{"class404"}, []string{"class1", "class2"}, 1, 1, true}, + {1, []string{"class1"}, []string{"class1"}, []string{"class404", "class2"}, nServers, nServers, false}, + {nServers*2 + 1, []string{}, []string{"class1"}, nil, nServers, nServers, false}, + {1, []string{"class1"}, []string{"class404"}, nil, nServers, nServers, false}, + {1, []string{"class1"}, []string{"class1", "class404"}, nil, nServers, nServers, false}, + {1, []string{"class1"}, nil, []string{"class1", "class404"}, nServers, nServers, false}, } { c.Logf("%+v", trial) st := &StubPutHandler{ @@ -280,6 +369,7 @@ func (s *StandaloneSuite) TestPutWithStorageClasses(c *C) { kc, _ := MakeKeepClient(arv) kc.Want_replicas = trial.replicas kc.StorageClasses = trial.clientClasses + kc.DefaultStorageClasses = trial.defaultClasses arv.ApiToken = "abc123" localRoots := make(map[string]string) writableLocalRoots := make(map[string]string) @@ -295,17 +385,17 @@ func (s *StandaloneSuite) TestPutWithStorageClasses(c *C) { StorageClasses: trial.putClasses, }) if trial.success { - c.Check(err, check.IsNil) + c.Check(err, IsNil) } else { - c.Check(err, check.NotNil) + c.Check(err, NotNil) } - c.Check(len(st.handled) >= trial.minRequests, check.Equals, true, check.Commentf("len(st.handled)==%d, trial.minRequests==%d", len(st.handled), trial.minRequests)) - c.Check(len(st.handled) <= trial.maxRequests, check.Equals, true, check.Commentf("len(st.handled)==%d, trial.maxRequests==%d", len(st.handled), trial.maxRequests)) - if !trial.success && trial.replicas == 1 && c.Check(len(st.requests) >= 2, check.Equals, true) { + c.Check(len(st.handled) >= trial.minRequests, Equals, true, Commentf("len(st.handled)==%d, trial.minRequests==%d", len(st.handled), trial.minRequests)) + c.Check(len(st.handled) <= trial.maxRequests, Equals, true, Commentf("len(st.handled)==%d, trial.maxRequests==%d", len(st.handled), trial.maxRequests)) + if !trial.success && trial.replicas == 1 && c.Check(len(st.requests) >= 2, Equals, true) { // Max concurrency should be 1. First request // should have succeeded for class1. Second // request should only ask for class404. - c.Check(st.requests[1].Header.Get("X-Keep-Storage-Classes"), check.Equals, "class404") + c.Check(st.requests[1].Header.Get("X-Keep-Storage-Classes"), Equals, "class404") } } } @@ -392,7 +482,7 @@ func (s *StandaloneSuite) TestPutB(c *C) { expectPath: hash, expectAPIToken: "abc123", expectBody: "foo", - expectStorageClass: "", + expectStorageClass: "default", returnStorageClasses: "", handled: make(chan string, 5), } @@ -436,7 +526,7 @@ func (s *StandaloneSuite) TestPutHR(c *C) { expectPath: hash, expectAPIToken: "abc123", expectBody: "foo", - expectStorageClass: "", + expectStorageClass: "default", returnStorageClasses: "", handled: make(chan string, 5), } @@ -487,7 +577,7 @@ func (s *StandaloneSuite) TestPutWithFail(c *C) { expectPath: hash, expectAPIToken: "abc123", expectBody: "foo", - expectStorageClass: "", + expectStorageClass: "default", returnStorageClasses: "", handled: make(chan string, 4), } @@ -549,7 +639,7 @@ func (s *StandaloneSuite) TestPutWithTooManyFail(c *C) { expectPath: hash, expectAPIToken: "abc123", expectBody: "foo", - expectStorageClass: "", + expectStorageClass: "default", returnStorageClasses: "", handled: make(chan string, 1), } @@ -1159,7 +1249,7 @@ func (s *StandaloneSuite) TestPutBWant2ReplicasWithOnlyOneWritableLocalRoot(c *C expectPath: hash, expectAPIToken: "abc123", expectBody: "foo", - expectStorageClass: "", + expectStorageClass: "default", returnStorageClasses: "", handled: make(chan string, 5), } @@ -1378,7 +1468,7 @@ func (s *StandaloneSuite) TestPutBRetry(c *C) { expectPath: Md5String("foo"), expectAPIToken: "abc123", expectBody: "foo", - expectStorageClass: "", + expectStorageClass: "default", returnStorageClasses: "", handled: make(chan string, 5), }, diff --git a/sdk/go/keepclient/support.go b/sdk/go/keepclient/support.go index 633ec18968..8d299815b2 100644 --- a/sdk/go/keepclient/support.go +++ b/sdk/go/keepclient/support.go @@ -164,7 +164,11 @@ func (kc *KeepClient) BlockWrite(ctx context.Context, req arvados.BlockWriteOpti req.Hash = fmt.Sprintf("%x", m.Sum(nil)) } if req.StorageClasses == nil { - req.StorageClasses = kc.StorageClasses + if len(kc.StorageClasses) > 0 { + req.StorageClasses = kc.StorageClasses + } else { + req.StorageClasses = kc.DefaultStorageClasses + } } if req.Replicas == 0 { req.Replicas = kc.Want_replicas diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py index 44d0325353..50cb703a56 100644 --- a/sdk/python/arvados/collection.py +++ b/sdk/python/arvados/collection.py @@ -1296,8 +1296,8 @@ class Collection(RichCollectionBase): :storage_classes_desired: A list of storage class names where to upload the data. If None, - the keepstores are expected to store the data into their default - storage class. + the keep client is expected to store the data into the cluster's + default storage class(es). """ diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py index 86b1d91b82..9dfe0436de 100644 --- a/sdk/python/arvados/keep.py +++ b/sdk/python/arvados/keep.py @@ -842,6 +842,7 @@ class KeepClient(object): self.hits_counter = Counter() self.misses_counter = Counter() self._storage_classes_unsupported_warning = False + self._default_classes = [] if local_store: self.local_store = local_store @@ -882,6 +883,12 @@ class KeepClient(object): self._writable_services = None self.using_proxy = None self._static_services_list = False + try: + self._default_classes = [ + k for k, v in self.api_client.config()['StorageClasses'].items() if v['Default']] + except KeyError: + # We're talking to an old cluster + pass def current_timeout(self, attempt_number): """Return the appropriate timeout to use for this client. @@ -1174,7 +1181,7 @@ class KeepClient(object): "failed to read {} after {}".format(loc_s, loop.attempts_str()), service_errors, label="service") @retry.retry_method - def put(self, data, copies=2, num_retries=None, request_id=None, classes=[]): + def put(self, data, copies=2, num_retries=None, request_id=None, classes=None): """Save data in Keep. This method will get a list of Keep services from the API server, and @@ -1195,6 +1202,8 @@ class KeepClient(object): be written. """ + classes = classes or self._default_classes + if not isinstance(data, bytes): data = data.encode() diff --git a/sdk/python/tests/arvados_testutil.py b/sdk/python/tests/arvados_testutil.py index f251ea654b..d9b3ca86c4 100644 --- a/sdk/python/tests/arvados_testutil.py +++ b/sdk/python/tests/arvados_testutil.py @@ -190,7 +190,13 @@ class MockStreamReader(object): class ApiClientMock(object): def api_client_mock(self): - return mock.MagicMock(name='api_client_mock') + api_mock = mock.MagicMock(name='api_client_mock') + api_mock.config.return_value = { + 'StorageClasses': { + 'default': {'Default': True} + } + } + return api_mock def mock_keep_services(self, api_mock=None, status=200, count=12, service_type='disk', diff --git a/sdk/python/tests/test_keep_client.py b/sdk/python/tests/test_keep_client.py index 0eefa586d9..b1c42fd2b3 100644 --- a/sdk/python/tests/test_keep_client.py +++ b/sdk/python/tests/test_keep_client.py @@ -540,26 +540,49 @@ class KeepStorageClassesTestCase(unittest.TestCase, tutil.ApiClientMock): self.data = b'xyzzy' self.locator = '1271ed5ef305aadabc605b1609e24c52' + def test_multiple_default_storage_classes_req_header(self): + api_mock = self.api_client_mock() + api_mock.config.return_value = { + 'StorageClasses': { + 'foo': { 'Default': True }, + 'bar': { 'Default': True }, + 'baz': { 'Default': False } + } + } + api_client = self.mock_keep_services(api_mock=api_mock, count=2) + keep_client = arvados.KeepClient(api_client=api_client) + resp_hdr = { + 'x-keep-storage-classes-confirmed': 'foo=1, bar=1', + 'x-keep-replicas-stored': 1 + } + with tutil.mock_keep_responses(self.locator, 200, **resp_hdr) as mock: + keep_client.put(self.data, copies=1) + req_hdr = mock.responses[0] + self.assertIn( + 'X-Keep-Storage-Classes: bar, foo', req_hdr.getopt(pycurl.HTTPHEADER)) + def test_storage_classes_req_header(self): + self.assertEqual( + self.api_client.config()['StorageClasses'], + {'default': {'Default': True}}) cases = [ # requested, expected [['foo'], 'X-Keep-Storage-Classes: foo'], [['bar', 'foo'], 'X-Keep-Storage-Classes: bar, foo'], - [[], None], + [[], 'X-Keep-Storage-Classes: default'], + [None, 'X-Keep-Storage-Classes: default'], ] for req_classes, expected_header in cases: headers = {'x-keep-replicas-stored': 1} - if len(req_classes) > 0: + if req_classes is None or len(req_classes) == 0: + confirmed_hdr = 'default=1' + elif len(req_classes) > 0: confirmed_hdr = ', '.join(["{}=1".format(cls) for cls in req_classes]) - headers.update({'x-keep-storage-classes-confirmed': confirmed_hdr}) + headers.update({'x-keep-storage-classes-confirmed': confirmed_hdr}) with tutil.mock_keep_responses(self.locator, 200, **headers) as mock: self.keep_client.put(self.data, copies=1, classes=req_classes) - resp = mock.responses[0] - if expected_header is not None: - self.assertIn(expected_header, resp.getopt(pycurl.HTTPHEADER)) - else: - for hdr in resp.getopt(pycurl.HTTPHEADER): - self.assertNotRegex(hdr, r'^X-Keep-Storage-Classes.*') + req_hdr = mock.responses[0] + self.assertIn(expected_header, req_hdr.getopt(pycurl.HTTPHEADER)) def test_partial_storage_classes_put(self): headers = { @@ -1368,6 +1391,8 @@ class KeepClientAPIErrorTest(unittest.TestCase): return "abc" elif r == "insecure": return False + elif r == "config": + return lambda: {} else: raise arvados.errors.KeepReadError() keep_client = arvados.KeepClient(api_client=ApiMock(), diff --git a/services/api/app/controllers/arvados/v1/collections_controller.rb b/services/api/app/controllers/arvados/v1/collections_controller.rb index 440ac64016..13b0261d94 100644 --- a/services/api/app/controllers/arvados/v1/collections_controller.rb +++ b/services/api/app/controllers/arvados/v1/collections_controller.rb @@ -69,12 +69,12 @@ class Arvados::V1::CollectionsController < ApplicationController include_old_versions: params[:include_old_versions], } - # It matters which Collection object we pick because we use it to get signed_manifest_text, - # the value of which is affected by the value of trash_at. + # It matters which Collection object we pick because blob + # signatures depend on the value of trash_at. # - # From postgres doc: "By default, null values sort as if larger than any non-null - # value; that is, NULLS FIRST is the default for DESC order, and - # NULLS LAST otherwise." + # From postgres doc: "By default, null values sort as if larger + # than any non-null value; that is, NULLS FIRST is the default + # for DESC order, and NULLS LAST otherwise." # # "trash_at desc" sorts null first, then latest to earliest, so # it will select the Collection object with the longest @@ -84,7 +84,7 @@ class Arvados::V1::CollectionsController < ApplicationController @object = { uuid: c.portable_data_hash, portable_data_hash: c.portable_data_hash, - manifest_text: c.signed_manifest_text, + manifest_text: c.manifest_text, } end else diff --git a/services/api/app/controllers/arvados/v1/users_controller.rb b/services/api/app/controllers/arvados/v1/users_controller.rb index f4d42edf6c..82594c1eb3 100644 --- a/services/api/app/controllers/arvados/v1/users_controller.rb +++ b/services/api/app/controllers/arvados/v1/users_controller.rb @@ -29,7 +29,22 @@ class Arvados::V1::UsersController < ApplicationController end end if needupdate.length > 0 - u.update_attributes!(needupdate) + begin + u.update_attributes!(needupdate) + rescue ActiveRecord::RecordInvalid + loginCluster = Rails.configuration.Login.LoginCluster + if u.uuid[0..4] == loginCluster && !needupdate[:username].nil? + local_user = User.find_by_username(needupdate[:username]) + # A cached user record from the LoginCluster is stale, reset its username + # and retry the update operation. + if local_user.andand.uuid[0..4] == loginCluster && local_user.uuid != u.uuid + Rails.logger.warn("cached username '#{needupdate[:username]}' collision with user '#{local_user.uuid}' - resetting") + local_user.update_attributes!({username: nil}) + retry + end + end + raise # Not the issue we're handling above + end end @objects << u end diff --git a/services/api/app/models/api_client_authorization.rb b/services/api/app/models/api_client_authorization.rb index 52f2cee064..7c7ed759c6 100644 --- a/services/api/app/models/api_client_authorization.rb +++ b/services/api/app/models/api_client_authorization.rb @@ -319,7 +319,17 @@ class ApiClientAuthorization < ArvadosModel user.last_name = "from cluster #{remote_user_prefix}" end - user.save! + begin + user.save! + rescue ActiveRecord::RecordInvalid, ActiveRecord::RecordNotUnique + Rails.logger.debug("remote user #{remote_user['uuid']} already exists, retrying...") + # Some other request won the race: retry fetching the user record. + user = User.find_by_uuid(remote_user['uuid']) + if !user + Rails.logger.warn("cannot find or create remote user #{remote_user['uuid']}") + return nil + end + end if user.is_invited && !remote_user['is_invited'] # Remote user is not "invited" state, they should be unsetup, which @@ -364,12 +374,24 @@ class ApiClientAuthorization < ArvadosModel exp = [db_current_time + Rails.configuration.Login.RemoteTokenRefresh, remote_token.andand['expires_at']].compact.min scopes = remote_token.andand['scopes'] || ['all'] - auth = ApiClientAuthorization.find_or_create_by(uuid: token_uuid) do |auth| - auth.user = user - auth.api_token = stored_secret - auth.api_client_id = 0 - auth.scopes = scopes - auth.expires_at = exp + begin + retries ||= 0 + auth = ApiClientAuthorization.find_or_create_by(uuid: token_uuid) do |auth| + auth.user = user + auth.api_token = stored_secret + auth.api_client_id = 0 + auth.scopes = scopes + auth.expires_at = exp + end + rescue ActiveRecord::RecordNotUnique + Rails.logger.debug("cached remote token #{token_uuid} already exists, retrying...") + # Some other request won the race: retry just once before erroring out + if (retries += 1) <= 1 + retry + else + Rails.logger.warn("cannot find or create cached remote token #{token_uuid}") + return nil + end end auth.update_attributes!(user: user, api_token: stored_secret, diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb index 1bbe8cc661..a98cde4446 100644 --- a/services/api/app/models/collection.rb +++ b/services/api/app/models/collection.rb @@ -44,8 +44,8 @@ class Collection < ArvadosModel t.add :description t.add :properties t.add :portable_data_hash - t.add :signed_manifest_text, as: :manifest_text t.add :manifest_text, as: :unsigned_manifest_text + t.add :manifest_text, as: :manifest_text t.add :replication_desired t.add :replication_confirmed t.add :replication_confirmed_at @@ -71,15 +71,11 @@ class Collection < ArvadosModel def self.attributes_required_columns super.merge( - # If we don't list manifest_text explicitly, the - # params[:select] code gets confused by the way we - # expose signed_manifest_text as manifest_text in the - # API response, and never let clients select the - # manifest_text column. - # - # We need trash_at and is_trashed to determine the - # correct timestamp in signed_manifest_text. - 'manifest_text' => ['manifest_text', 'trash_at', 'is_trashed'], + # If we don't list unsigned_manifest_text explicitly, + # the params[:select] code gets confused by the way we + # expose manifest_text as unsigned_manifest_text in + # the API response, and never let clients select the + # unsigned_manifest_text column. 'unsigned_manifest_text' => ['manifest_text'], 'name' => ['name'], ) @@ -406,7 +402,7 @@ class Collection < ArvadosModel end end - def signed_manifest_text + def signed_manifest_text_only_for_tests if !has_attribute? :manifest_text return nil elsif is_trashed @@ -415,11 +411,11 @@ class Collection < ArvadosModel token = Thread.current[:token] exp = [db_current_time.to_i + Rails.configuration.Collections.BlobSigningTTL.to_i, trash_at].compact.map(&:to_i).min - self.class.sign_manifest manifest_text, token, exp + self.class.sign_manifest_only_for_tests manifest_text, token, exp end end - def self.sign_manifest manifest, token, exp=nil + def self.sign_manifest_only_for_tests manifest, token, exp=nil if exp.nil? exp = db_current_time.to_i + Rails.configuration.Collections.BlobSigningTTL.to_i end diff --git a/services/api/db/migrate/20210816191509_drop_fts_index.rb b/services/api/db/migrate/20210816191509_drop_fts_index.rb new file mode 100644 index 0000000000..4ee1f55a37 --- /dev/null +++ b/services/api/db/migrate/20210816191509_drop_fts_index.rb @@ -0,0 +1,31 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +class DropFtsIndex < ActiveRecord::Migration[5.2] + def fts_indexes + { + "collections" => "collections_full_text_search_idx", + "container_requests" => "container_requests_full_text_search_idx", + "groups" => "groups_full_text_search_idx", + "jobs" => "jobs_full_text_search_idx", + "pipeline_instances" => "pipeline_instances_full_text_search_idx", + "pipeline_templates" => "pipeline_templates_full_text_search_idx", + "workflows" => "workflows_full_text_search_idx", + } + end + + def up + fts_indexes.keys.each do |t| + i = fts_indexes[t] + execute "DROP INDEX IF EXISTS #{i}" + end + end + + def down + fts_indexes.keys.each do |t| + i = fts_indexes[t] + execute "CREATE INDEX #{i} ON #{t} USING gin(#{t.classify.constantize.full_text_tsvector})" + end + end +end diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql index 2bca887212..2f77483356 100644 --- a/services/api/db/structure.sql +++ b/services/api/db/structure.sql @@ -238,29 +238,6 @@ SET default_tablespace = ''; SET default_with_oids = false; --- --- Name: groups; Type: TABLE; Schema: public; Owner: - --- - -CREATE TABLE public.groups ( - id integer NOT NULL, - uuid character varying(255), - owner_uuid character varying(255), - created_at timestamp without time zone NOT NULL, - modified_by_client_uuid character varying(255), - modified_by_user_uuid character varying(255), - modified_at timestamp without time zone, - name character varying(255) NOT NULL, - description character varying(524288), - updated_at timestamp without time zone NOT NULL, - group_class character varying(255), - trash_at timestamp without time zone, - is_trashed boolean DEFAULT false NOT NULL, - delete_at timestamp without time zone, - properties jsonb DEFAULT '{}'::jsonb -); - - -- -- Name: api_client_authorizations; Type: TABLE; Schema: public; Owner: - -- @@ -571,6 +548,29 @@ CREATE SEQUENCE public.containers_id_seq ALTER SEQUENCE public.containers_id_seq OWNED BY public.containers.id; +-- +-- Name: groups; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.groups ( + id integer NOT NULL, + uuid character varying(255), + owner_uuid character varying(255), + created_at timestamp without time zone NOT NULL, + modified_by_client_uuid character varying(255), + modified_by_user_uuid character varying(255), + modified_at timestamp without time zone, + name character varying(255) NOT NULL, + description character varying(524288), + updated_at timestamp without time zone NOT NULL, + group_class character varying(255), + trash_at timestamp without time zone, + is_trashed boolean DEFAULT false NOT NULL, + delete_at timestamp without time zone, + properties jsonb DEFAULT '{}'::jsonb +); + + -- -- Name: groups_id_seq; Type: SEQUENCE; Schema: public; Owner: - -- @@ -1722,13 +1722,6 @@ CREATE INDEX authorized_keys_search_index ON public.authorized_keys USING btree CREATE INDEX collection_index_on_properties ON public.collections USING gin (properties); --- --- Name: collections_full_text_search_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX collections_full_text_search_idx ON public.collections USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text)), 0, 1000000))); - - -- -- Name: collections_search_index; Type: INDEX; Schema: public; Owner: - -- @@ -1743,13 +1736,6 @@ CREATE INDEX collections_search_index ON public.collections USING btree (owner_u CREATE INDEX collections_trgm_text_search_idx ON public.collections USING gin (((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text))) public.gin_trgm_ops); --- --- Name: container_requests_full_text_search_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX container_requests_full_text_search_idx ON public.container_requests USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(requesting_container_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(container_uuid, ''::character varying))::text) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(container_image, ''::character varying))::text) || ' '::text) || COALESCE(environment, ''::text)) || ' '::text) || (COALESCE(cwd, ''::character varying))::text) || ' '::text) || COALESCE(command, ''::text)) || ' '::text) || (COALESCE(output_path, ''::character varying))::text) || ' '::text) || COALESCE(filters, ''::text)) || ' '::text) || COALESCE(scheduling_parameters, ''::text)) || ' '::text) || (COALESCE(output_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output_name, ''::character varying))::text), 0, 1000000))); - - -- -- Name: container_requests_index_on_properties; Type: INDEX; Schema: public; Owner: - -- @@ -1785,13 +1771,6 @@ CREATE INDEX containers_search_index ON public.containers USING btree (uuid, own CREATE INDEX group_index_on_properties ON public.groups USING gin (properties); --- --- Name: groups_full_text_search_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX groups_full_text_search_idx ON public.groups USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(group_class, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)), 0, 1000000))); - - -- -- Name: groups_search_index; Type: INDEX; Schema: public; Owner: - -- @@ -2779,13 +2758,6 @@ CREATE UNIQUE INDEX index_workflows_on_uuid ON public.workflows USING btree (uui CREATE INDEX job_tasks_search_index ON public.job_tasks USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, job_uuid, created_by_job_task_uuid); --- --- Name: jobs_full_text_search_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX jobs_full_text_search_idx ON public.jobs USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(submit_id, ''::character varying))::text) || ' '::text) || (COALESCE(script, ''::character varying))::text) || ' '::text) || (COALESCE(script_version, ''::character varying))::text) || ' '::text) || COALESCE(script_parameters, ''::text)) || ' '::text) || (COALESCE(cancelled_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(cancelled_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output, ''::character varying))::text) || ' '::text) || (COALESCE(is_locked_by_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log, ''::character varying))::text) || ' '::text) || COALESCE(tasks_summary, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(repository, ''::character varying))::text) || ' '::text) || (COALESCE(supplied_script_version, ''::character varying))::text) || ' '::text) || (COALESCE(docker_image_locator, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(arvados_sdk_version, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)), 0, 1000000))); - - -- -- Name: jobs_search_index; Type: INDEX; Schema: public; Owner: - -- @@ -2877,13 +2849,6 @@ CREATE INDEX permission_target ON public.materialized_permissions USING btree (t CREATE UNIQUE INDEX permission_user_target ON public.materialized_permissions USING btree (user_uuid, target_uuid); --- --- Name: pipeline_instances_full_text_search_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX pipeline_instances_full_text_search_idx ON public.pipeline_instances USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(pipeline_template_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || COALESCE(components_summary, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text), 0, 1000000))); - - -- -- Name: pipeline_instances_search_index; Type: INDEX; Schema: public; Owner: - -- @@ -2905,13 +2870,6 @@ CREATE INDEX pipeline_instances_trgm_text_search_idx ON public.pipeline_instance CREATE UNIQUE INDEX pipeline_template_owner_uuid_name_unique ON public.pipeline_templates USING btree (owner_uuid, name); --- --- Name: pipeline_templates_full_text_search_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX pipeline_templates_full_text_search_idx ON public.pipeline_templates USING gin (to_tsvector('english'::regconfig, substr((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text), 0, 1000000))); - - -- -- Name: pipeline_templates_search_index; Type: INDEX; Schema: public; Owner: - -- @@ -2968,13 +2926,6 @@ CREATE INDEX users_search_index ON public.users USING btree (uuid, owner_uuid, m CREATE INDEX virtual_machines_search_index ON public.virtual_machines USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, hostname); --- --- Name: workflows_full_text_search_idx; Type: INDEX; Schema: public; Owner: - --- - -CREATE INDEX workflows_full_text_search_idx ON public.workflows USING gin (to_tsvector('english'::regconfig, substr((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)), 0, 1000000))); - - -- -- Name: workflows_search_idx; Type: INDEX; Schema: public; Owner: - -- @@ -3194,6 +3145,7 @@ INSERT INTO "schema_migrations" (version) VALUES ('20201202174753'), ('20210108033940'), ('20210126183521'), -('20210621204455'); +('20210621204455'), +('20210816191509'); diff --git a/services/api/lib/record_filters.rb b/services/api/lib/record_filters.rb index 5688ca6140..409e48a6f0 100644 --- a/services/api/lib/record_filters.rb +++ b/services/api/lib/record_filters.rb @@ -31,7 +31,10 @@ module RecordFilters model_table_name = model_class.table_name filters.each do |filter| attrs_in, operator, operand = filter - if attrs_in == 'any' && operator != '@@' + if operator == '@@' + raise ArgumentError.new("Full text search operator is no longer supported") + end + if attrs_in == 'any' attrs = model_class.searchable_columns(operator) elsif attrs_in.is_a? Array attrs = attrs_in @@ -44,9 +47,10 @@ module RecordFilters raise ArgumentError.new("Invalid operator '#{operator}' (#{operator.class}) in filter") end + operator = operator.downcase cond_out = [] - if attrs_in == 'any' && (operator.casecmp('ilike').zero? || operator.casecmp('like').zero?) && (operand.is_a? String) && operand.match('^[%].*[%]$') + if attrs_in == 'any' && (operator == 'ilike' || operator == 'like') && (operand.is_a? String) && operand.match('^[%].*[%]$') # Trigram index search cond_out << model_class.full_text_trgm + " #{operator} ?" param_out << operand @@ -54,22 +58,6 @@ module RecordFilters attrs = [] end - if operator == '@@' - # Full-text search - if attrs_in != 'any' - raise ArgumentError.new("Full text search on individual columns is not supported") - end - if operand.is_a? Array - raise ArgumentError.new("Full text search not supported for array operands") - end - - # Skip the generic per-column operator loop below - attrs = [] - # Use to_tsquery since plainto_tsquery does not support prefix - # search. And, split operand and join the words with ' & ' - cond_out << model_class.full_text_tsvector+" @@ to_tsquery(?)" - param_out << operand.split.join(' & ') - end attrs.each do |attr| subproperty = attr.split(".", 2) @@ -98,9 +86,9 @@ module RecordFilters end # jsonb search - case operator.downcase + case operator when '=', '!=' - not_in = if operator.downcase == "!=" then "NOT " else "" end + not_in = if operator == "!=" then "NOT " else "" end cond_out << "#{not_in}(#{attr_table_name}.#{attr} @> ?::jsonb)" param_out << SafeJSON.dump({proppath => operand}) when 'in' @@ -147,7 +135,7 @@ module RecordFilters else raise ArgumentError.new("Invalid operator for subproperty search '#{operator}'") end - elsif operator.downcase == "exists" + elsif operator == "exists" if col.type != :jsonb raise ArgumentError.new("Invalid attribute '#{attr}' for operator '#{operator}' in filter") end @@ -155,11 +143,12 @@ module RecordFilters cond_out << "jsonb_exists(#{attr_table_name}.#{attr}, ?)" param_out << operand else - if !attr_model_class.searchable_columns(operator).index attr + if !attr_model_class.searchable_columns(operator).index(attr) && + !(col.andand.type == :jsonb && ['contains', '=', '<>', '!='].index(operator)) raise ArgumentError.new("Invalid attribute '#{attr}' in filter") end - case operator.downcase + case operator when '=', '<', '<=', '>', '>=', '!=', 'like', 'ilike' attr_type = attr_model_class.attribute_column(attr).type operator = '<>' if operator == '!=' @@ -240,6 +229,26 @@ module RecordFilters end end cond_out << cond.join(' OR ') + when 'contains' + if col.andand.type != :jsonb + raise ArgumentError.new("Invalid attribute '#{attr}' for '#{operator}' operator") + end + if operand == [] + raise ArgumentError.new("Invalid operand '#{operand.inspect}' for '#{operator}' operator") + end + operand = [operand] unless operand.is_a? Array + operand.each do |op| + if !op.is_a?(String) + raise ArgumentError.new("Invalid element #{operand.inspect} in operand for #{operator.inspect} operator (operand must be a string or array of strings)") + end + end + # We use jsonb_exists_all(a,b) instead of "a ?& b" because + # the pg gem thinks "?" is a bind var. And we use string + # interpolation instead of param_out because the pg gem + # flattens param_out / doesn't support passing arrays as + # bind vars. + q = operand.map { |s| ActiveRecord::Base.connection.quote(s) }.join(',') + cond_out << "jsonb_exists_all(#{attr_table_name}.#{attr}, array[#{q}])" else raise ArgumentError.new("Invalid operator '#{operator}'") end diff --git a/services/api/test/fixtures/jobs.yml b/services/api/test/fixtures/jobs.yml index 9b067aa263..ab76417902 100644 --- a/services/api/test/fixtures/jobs.yml +++ b/services/api/test/fixtures/jobs.yml @@ -521,7 +521,7 @@ running_job_in_publicly_accessible_project: uuid: zzzzz-8i9sb-n7omg50bvt0m1nf owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0 modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz - repository: active/foo + repository: active/bar script: running_job_script script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577 state: Running diff --git a/services/api/test/fixtures/pipeline_instances.yml b/services/api/test/fixtures/pipeline_instances.yml index 0865503281..9621b3effc 100644 --- a/services/api/test/fixtures/pipeline_instances.yml +++ b/services/api/test/fixtures/pipeline_instances.yml @@ -111,12 +111,9 @@ has_job: components_is_jobspec: # Helps test that clients cope with funny-shaped components. # For an example, see #3321. - uuid: zzzzz-d1hrv-jobspeccomponts - created_at: <%= 30.minute.ago.to_s(:db) %> + uuid: zzzzz-d1hrv-1yfj61234abcdk4 + created_at: <%= 2.minute.ago.to_s(:db) %> owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz - created_at: 2014-04-14 12:35:04 -0400 - updated_at: 2014-04-14 12:35:04 -0400 - modified_at: 2014-04-14 12:35:04 -0400 modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz state: RunningOnServer diff --git a/services/api/test/functional/arvados/v1/collections_controller_test.rb b/services/api/test/functional/arvados/v1/collections_controller_test.rb index 1ca2dd1dc1..6c923ff38d 100644 --- a/services/api/test/functional/arvados/v1/collections_controller_test.rb +++ b/services/api/test/functional/arvados/v1/collections_controller_test.rb @@ -32,8 +32,7 @@ class Arvados::V1::CollectionsControllerTest < ActionController::TestCase end end - def assert_unsigned_manifest resp, label='' - txt = resp['unsigned_manifest_text'] + def assert_unsigned_manifest txt, label='' assert_not_nil(txt, "#{label} unsigned_manifest_text was nil") locs = 0 txt.scan(/ [[:xdigit:]]{32}\S*/) do |tok| @@ -66,24 +65,16 @@ class Arvados::V1::CollectionsControllerTest < ActionController::TestCase "past version not included on index") end - test "collections.get returns signed locators, and no unsigned_manifest_text" do + test "collections.get returns unsigned locators, and no unsigned_manifest_text" do permit_unsigned_manifests authorize_with :active get :show, params: {id: collections(:foo_file).uuid} assert_response :success - assert_signed_manifest json_response['manifest_text'], 'foo_file' + assert_unsigned_manifest json_response["manifest_text"], 'foo_file' refute_includes json_response, 'unsigned_manifest_text' end ['v1token', 'v2token'].each do |token_method| - test "correct signatures are given for #{token_method}" do - token = api_client_authorizations(:active).send(token_method) - authorize_with_token token - get :show, params: {id: collections(:foo_file).uuid} - assert_response :success - assert_signed_manifest json_response['manifest_text'], 'foo_file', token: token - end - test "signatures with #{token_method} are accepted" do token = api_client_authorizations(:active).send(token_method) signed = Blob.sign_locator( @@ -98,11 +89,11 @@ class Arvados::V1::CollectionsControllerTest < ActionController::TestCase }, } assert_response :success - assert_signed_manifest json_response['manifest_text'], 'updated', token: token + assert_unsigned_manifest json_response['manifest_text'], 'updated' end end - test "index with manifest_text selected returns signed locators" do + test "index with manifest_text selected returns unsigned locators" do columns = %w(uuid owner_uuid manifest_text) authorize_with :active get :index, params: {select: columns} @@ -112,7 +103,7 @@ class Arvados::V1::CollectionsControllerTest < ActionController::TestCase json_response["items"].each do |coll| assert_equal(coll.keys - ['kind'], columns, "Collections index did not respect selected columns") - assert_signed_manifest coll['manifest_text'], coll['uuid'] + assert_unsigned_manifest coll['manifest_text'], coll['uuid'] end end @@ -125,7 +116,8 @@ class Arvados::V1::CollectionsControllerTest < ActionController::TestCase json_response["items"].each do |coll| assert_equal(coll.keys - ['kind'], ['unsigned_manifest_text'], "Collections index did not respect selected columns") - locs += assert_unsigned_manifest coll, coll['uuid'] + assert_nil coll['manifest_text'] + locs += assert_unsigned_manifest coll['unsigned_manifest_text'], coll['uuid'] end assert_operator locs, :>, 0, "no locators found in any manifests" end @@ -338,7 +330,7 @@ EOS assert_not_nil assigns(:object) resp = assigns(:object) assert_equal foo_collection[:portable_data_hash], resp[:portable_data_hash] - assert_signed_manifest resp[:manifest_text] + assert_unsigned_manifest resp[:manifest_text] # The manifest in the response will have had permission hints added. # Remove any permission hints in the response before comparing it to the source. @@ -388,7 +380,7 @@ EOS authorize_with :active manifest_text = ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:foo.txt\n" if !unsigned - manifest_text = Collection.sign_manifest manifest_text, api_token(:active) + manifest_text = Collection.sign_manifest_only_for_tests manifest_text, api_token(:active) end post :create, params: { collection: { @@ -594,10 +586,10 @@ EOS assert_not_nil assigns(:object) resp = JSON.parse(@response.body) assert_equal manifest_uuid, resp['portable_data_hash'] - # All of the locators in the output must be signed. + # All of the signatures in the output must be valid. resp['manifest_text'].lines.each do |entry| m = /([[:xdigit:]]{32}\+\S+)/.match(entry) - if m + if m && m[0].index('+A') assert Blob.verify_signature m[0], signing_opts end end @@ -641,10 +633,10 @@ EOS assert_not_nil assigns(:object) resp = JSON.parse(@response.body) assert_equal manifest_uuid, resp['portable_data_hash'] - # All of the locators in the output must be signed. + # All of the signatures in the output must be valid. resp['manifest_text'].lines.each do |entry| m = /([[:xdigit:]]{32}\+\S+)/.match(entry) - if m + if m && m[0].index('+A') assert Blob.verify_signature m[0], signing_opts end end @@ -746,50 +738,6 @@ EOS assert_equal manifest_text, stripped_manifest end - test "multiple signed locators per line" do - permit_unsigned_manifests - authorize_with :active - locators = %w( - d41d8cd98f00b204e9800998ecf8427e+0 - acbd18db4cc2f85cedef654fccc4a4d8+3 - ea10d51bcf88862dbcc36eb292017dfd+45) - - signing_opts = { - key: Rails.configuration.Collections.BlobSigningKey, - api_token: api_token(:active), - } - - unsigned_manifest = [".", *locators, "0:0:foo.txt\n"].join(" ") - manifest_uuid = Digest::MD5.hexdigest(unsigned_manifest) + - '+' + - unsigned_manifest.length.to_s - - signed_locators = locators.map { |loc| Blob.sign_locator loc, signing_opts } - signed_manifest = [".", *signed_locators, "0:0:foo.txt\n"].join(" ") - - post :create, params: { - collection: { - manifest_text: signed_manifest, - portable_data_hash: manifest_uuid, - } - } - assert_response :success - assert_not_nil assigns(:object) - resp = JSON.parse(@response.body) - assert_equal manifest_uuid, resp['portable_data_hash'] - # All of the locators in the output must be signed. - # Each line is of the form "path locator locator ... 0:0:file.txt" - # entry.split[1..-2] will yield just the tokens in the middle of the line - returned_locator_count = 0 - resp['manifest_text'].lines.each do |entry| - entry.split[1..-2].each do |tok| - returned_locator_count += 1 - assert Blob.verify_signature tok, signing_opts - end - end - assert_equal locators.count, returned_locator_count - end - test 'Reject manifest with unsigned blob' do permit_unsigned_manifests false authorize_with :active @@ -1455,4 +1403,18 @@ EOS assert_response :success assert_equal col.version, json_response['version'], 'Trashing a collection should not create a new version' end + + ["storage_classes_desired", "storage_classes_confirmed"].each do |attr| + test "filter collections by #{attr}" do + authorize_with(:active) + get :index, params: { + filters: [[attr, "=", '["default"]']] + } + assert_response :success + assert_not_equal 0, json_response["items"].length + json_response["items"].each do |c| + assert_equal ["default"], c[attr] + end + end + end end diff --git a/services/api/test/functional/arvados/v1/filters_test.rb b/services/api/test/functional/arvados/v1/filters_test.rb index 26270b1c3c..dd8eeaa7be 100644 --- a/services/api/test/functional/arvados/v1/filters_test.rb +++ b/services/api/test/functional/arvados/v1/filters_test.rb @@ -29,34 +29,14 @@ class Arvados::V1::FiltersTest < ActionController::TestCase json_response['errors'].join(' ')) end - test 'error message for full text search on a specific column' do + test 'error message for unsupported full text search' do @controller = Arvados::V1::CollectionsController.new authorize_with :active get :index, params: { filters: [['uuid', '@@', 'abcdef']], } assert_response 422 - assert_match(/not supported/, json_response['errors'].join(' ')) - end - - test 'difficult characters in full text search' do - @controller = Arvados::V1::CollectionsController.new - authorize_with :active - get :index, params: { - filters: [['any', '@@', 'a|b"c']], - } - assert_response :success - # (Doesn't matter so much which results are returned.) - end - - test 'array operand in full text search' do - @controller = Arvados::V1::CollectionsController.new - authorize_with :active - get :index, params: { - filters: [['any', '@@', ['abc', 'def']]], - } - assert_response 422 - assert_match(/not supported/, json_response['errors'].join(' ')) + assert_match(/no longer supported/, json_response['errors'].join(' ')) end test 'api responses provide timestamps with nanoseconds' do @@ -100,58 +80,6 @@ class Arvados::V1::FiltersTest < ActionController::TestCase end end - test "full text search with count='none'" do - @controller = Arvados::V1::GroupsController.new - authorize_with :admin - - get :contents, params: { - format: :json, - count: 'none', - limit: 1000, - filters: [['any', '@@', Rails.configuration.ClusterID]], - } - - assert_response :success - - all_objects = Hash.new(0) - json_response['items'].map{|o| o['kind']}.each{|t| all_objects[t] += 1} - - assert_equal true, all_objects['arvados#group']>0 - assert_equal true, all_objects['arvados#job']>0 - assert_equal true, all_objects['arvados#pipelineInstance']>0 - assert_equal true, all_objects['arvados#pipelineTemplate']>0 - - # Perform test again mimicking a second page request with: - # last_object_class = PipelineInstance - # and hence groups and jobs should not be included in the response - # offset = 5, which means first 5 pipeline instances were already received in page 1 - # and hence the remaining pipeline instances and all other object types should be included in the response - - @test_counter = 0 # Reset executed action counter - - @controller = Arvados::V1::GroupsController.new - - get :contents, params: { - format: :json, - count: 'none', - limit: 1000, - offset: '5', - last_object_class: 'PipelineInstance', - filters: [['any', '@@', Rails.configuration.ClusterID]], - } - - assert_response :success - - second_page = Hash.new(0) - json_response['items'].map{|o| o['kind']}.each{|t| second_page[t] += 1} - - assert_equal false, second_page.include?('arvados#group') - assert_equal false, second_page.include?('arvados#job') - assert_equal true, second_page['arvados#pipelineInstance']>0 - assert_equal all_objects['arvados#pipelineInstance'], second_page['arvados#pipelineInstance']+5 - assert_equal true, second_page['arvados#pipelineTemplate']>0 - end - [['prop1', '=', 'value1', [:collection_with_prop1_value1], [:collection_with_prop1_value2, :collection_with_prop2_1]], ['prop1', '!=', 'value1', [:collection_with_prop1_value2, :collection_with_prop2_1], [:collection_with_prop1_value1]], ['prop1', 'exists', true, [:collection_with_prop1_value1, :collection_with_prop1_value2, :collection_with_prop1_value3, :collection_with_prop1_other1], [:collection_with_prop2_1]], @@ -319,4 +247,51 @@ class Arvados::V1::FiltersTest < ActionController::TestCase assert_includes(found, collections(:replication_desired_2_unconfirmed).uuid) assert_includes(found, collections(:replication_desired_2_confirmed_2).uuid) end + + [ + [1, "foo"], + [1, ["foo"]], + [1, ["bar"]], + [1, ["bar", "foo"]], + [0, ["foo", "qux"]], + [0, ["qux"]], + [nil, []], + [nil, [[]]], + [nil, [["bogus"]]], + [nil, [{"foo" => "bar"}]], + [nil, {"foo" => "bar"}], + ].each do |results, operand| + test "storage_classes_desired contains #{operand.inspect}" do + @controller = Arvados::V1::CollectionsController.new + authorize_with(:active) + c = Collection.create!( + manifest_text: "", + storage_classes_desired: ["foo", "bar", "baz"]) + get :index, params: { + filters: [["storage_classes_desired", "contains", operand]], + } + if results.nil? + assert_response 422 + next + end + assert_response :success + assert_equal results, json_response["items"].length + if results > 0 + assert_equal c.uuid, json_response["items"][0]["uuid"] + end + end + end + + test "collections properties contains top level key" do + @controller = Arvados::V1::CollectionsController.new + authorize_with(:active) + get :index, params: { + filters: [["properties", "contains", "prop1"]], + } + assert_response :success + assert_not_empty json_response["items"] + json_response["items"].each do |c| + assert c["properties"].has_key?("prop1") + end + end end diff --git a/services/api/test/integration/collections_api_test.rb b/services/api/test/integration/collections_api_test.rb index 73cbad6430..baca6e2781 100644 --- a/services/api/test/integration/collections_api_test.rb +++ b/services/api/test/integration/collections_api_test.rb @@ -224,7 +224,7 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest test "create collection, update manifest, and search with filename" do # create collection - signed_manifest = Collection.sign_manifest(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active)) + signed_manifest = Collection.sign_manifest_only_for_tests(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active)) post "/arvados/v1/collections", params: { format: :json, @@ -241,7 +241,7 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest search_using_filter 'my_test_file.txt', 1 # update the collection's manifest text - signed_manifest = Collection.sign_manifest(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_updated_test_file.txt\n", api_token(:active)) + signed_manifest = Collection.sign_manifest_only_for_tests(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_updated_test_file.txt\n", api_token(:active)) put "/arvados/v1/collections/#{created['uuid']}", params: { format: :json, @@ -373,78 +373,9 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest end end - test "search collection using full text search" do - # create collection to be searched for - signed_manifest = Collection.sign_manifest(". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1_in_subdir3.txt 32:32:file2_in_subdir3.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file3_in_subdir4.txt 32:32:file4_in_subdir4.txt\n", api_token(:active)) - post "/arvados/v1/collections", - params: { - format: :json, - collection: {description: 'specific collection description', manifest_text: signed_manifest}.to_json, - }, - headers: auth(:active) - assert_response :success - assert_equal true, json_response['manifest_text'].include?('file4_in_subdir4.txt') - - # search using the filename - search_using_full_text_search 'subdir2', 0 - search_using_full_text_search 'subdir2:*', 1 - search_using_full_text_search 'subdir2/subdir3/subdir4', 1 - search_using_full_text_search 'file4:*', 1 - search_using_full_text_search 'file4_in_subdir4.txt', 1 - search_using_full_text_search 'subdir2 file4:*', 0 # first word is incomplete - search_using_full_text_search 'subdir2/subdir3/subdir4 file4:*', 1 - search_using_full_text_search 'subdir2/subdir3/subdir4 file4_in_subdir4.txt', 1 - search_using_full_text_search 'ile4', 0 # not a prefix match - end - - def search_using_full_text_search search_filter, expected_items - get '/arvados/v1/collections', - params: {:filters => [['any', '@@', search_filter]].to_json}, - headers: auth(:active) - assert_response :success - response_items = json_response['items'] - assert_not_nil response_items - if expected_items == 0 - assert_empty response_items - else - refute_empty response_items - first_item = response_items.first - assert_not_nil first_item - end - end - - # search for the filename in the file_names column and expect error - test "full text search not supported for individual columns" do - get '/arvados/v1/collections', - params: {:filters => [['name', '@@', 'General']].to_json}, - headers: auth(:active) - assert_response 422 - end - - [ - 'quick fox', - 'quick_brown fox', - 'brown_ fox', - 'fox dogs', - ].each do |search_filter| - test "full text search ignores special characters and finds with filter #{search_filter}" do - # description: The quick_brown_fox jumps over the lazy_dog - # full text search treats '_' as space apparently - get '/arvados/v1/collections', - params: {:filters => [['any', '@@', search_filter]].to_json}, - headers: auth(:active) - assert_response 200 - response_items = json_response['items'] - assert_not_nil response_items - first_item = response_items.first - refute_empty first_item - assert_equal first_item['description'], 'The quick_brown_fox jumps over the lazy_dog' - end - end - test "create and get collection with properties" do # create collection to be searched for - signed_manifest = Collection.sign_manifest(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active)) + signed_manifest = Collection.sign_manifest_only_for_tests(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active)) post "/arvados/v1/collections", params: { format: :json, @@ -470,7 +401,7 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest test "create collection and update it with json encoded hash properties" do # create collection to be searched for - signed_manifest = Collection.sign_manifest(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active)) + signed_manifest = Collection.sign_manifest_only_for_tests(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active)) post "/arvados/v1/collections", params: { format: :json, @@ -500,7 +431,7 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest Rails.configuration.Collections.CollectionVersioning = true Rails.configuration.Collections.PreserveVersionIfIdle = -1 # Disable auto versioning - signed_manifest = Collection.sign_manifest(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active)) + signed_manifest = Collection.sign_manifest_only_for_tests(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active)) post "/arvados/v1/collections", params: { format: :json, diff --git a/services/api/test/integration/groups_test.rb b/services/api/test/integration/groups_test.rb index aa67166f7e..e76f2b5406 100644 --- a/services/api/test/integration/groups_test.rb +++ b/services/api/test/integration/groups_test.rb @@ -64,46 +64,6 @@ class GroupsTest < ActionDispatch::IntegrationTest end end - [ - ['Collection_', true], # collections and pipelines templates - ['hash', true], # pipeline templates - ['fa7aeb5140e2848d39b', false], # script_parameter of pipeline instances - ['fa7aeb5140e2848d39b:*', true], # script_parameter of pipeline instances - ['project pipeline', true], # finds "Completed pipeline in A Project" - ['project pipeli:*', true], # finds "Completed pipeline in A Project" - ['proje pipeli:*', false], # first word is incomplete, so no prefix match - ['no-such-thing', false], # script_parameter of pipeline instances - ].each do |search_filter, expect_results| - test "full text search of group-owned objects for #{search_filter}" do - get "/arvados/v1/groups/contents", - params: { - id: groups(:aproject).uuid, - limit: 5, - :filters => [['any', '@@', search_filter]].to_json - }, - headers: auth(:active) - assert_response :success - if expect_results - refute_empty json_response['items'] - json_response['items'].each do |item| - assert item['uuid'] - assert_equal groups(:aproject).uuid, item['owner_uuid'] - end - else - assert_empty json_response['items'] - end - end - end - - test "full text search is not supported for individual columns" do - get "/arvados/v1/groups/contents", - params: { - :filters => [['name', '@@', 'Private']].to_json - }, - headers: auth(:active) - assert_response 422 - end - test "group contents with include trash collections" do get "/arvados/v1/groups/contents", params: { diff --git a/services/api/test/unit/arvados_model_test.rb b/services/api/test/unit/arvados_model_test.rb index 64f7807135..1e2e08059e 100644 --- a/services/api/test/unit/arvados_model_test.rb +++ b/services/api/test/unit/arvados_model_test.rb @@ -155,51 +155,6 @@ class ArvadosModelTest < ActiveSupport::TestCase end end - test "full text search index exists on models" do - indexes = {} - conn = ActiveRecord::Base.connection - conn.exec_query("SELECT i.relname as indname, - i.relowner as indowner, - idx.indrelid::regclass::text as table, - am.amname as indam, - idx.indkey, - ARRAY( - SELECT pg_get_indexdef(idx.indexrelid, k + 1, true) - FROM generate_subscripts(idx.indkey, 1) as k - ORDER BY k - ) as keys, - idx.indexprs IS NOT NULL as indexprs, - idx.indpred IS NOT NULL as indpred - FROM pg_index as idx - JOIN pg_class as i - ON i.oid = idx.indexrelid - JOIN pg_am as am - ON i.relam = am.oid - JOIN pg_namespace as ns - ON ns.oid = i.relnamespace - AND ns.nspname = ANY(current_schemas(false))").each do |idx| - if idx['keys'].match(/to_tsvector/) - indexes[idx['table']] ||= [] - indexes[idx['table']] << idx - end - end - fts_tables = ["collections", "container_requests", "groups", "jobs", - "pipeline_instances", "pipeline_templates", "workflows"] - fts_tables.each do |table| - table_class = table.classify.constantize - if table_class.respond_to?('full_text_searchable_columns') - expect = table_class.full_text_searchable_columns - ok = false - indexes[table].andand.each do |idx| - if expect == idx['keys'].scan(/COALESCE\(([A-Za-z_]+)/).flatten - ok = true - end - end - assert ok, "#{table} has no full-text index\nexpect: #{expect.inspect}\nfound: #{indexes[table].inspect}" - end - end - end - [ %w[collections collections_trgm_text_search_idx], %w[container_requests container_requests_trgm_text_search_idx], diff --git a/services/api/test/unit/collection_performance_test.rb b/services/api/test/unit/collection_performance_test.rb index 4efc947dd2..bbae49f4a2 100644 --- a/services/api/test/unit/collection_performance_test.rb +++ b/services/api/test/unit/collection_performance_test.rb @@ -42,10 +42,7 @@ class CollectionModelPerformanceTest < ActiveSupport::TestCase c = time_block 'read' do Collection.find_by_uuid(c.uuid) end - time_block 'sign' do - c.signed_manifest_text - end - time_block 'sign + render' do + time_block 'render' do c.as_api_response(nil) end loc = Blob.sign_locator(Digest::MD5.hexdigest('foo') + '+3', diff --git a/services/api/test/unit/collection_test.rb b/services/api/test/unit/collection_test.rb index 8b8edbc153..de0f1d360c 100644 --- a/services/api/test/unit/collection_test.rb +++ b/services/api/test/unit/collection_test.rb @@ -856,7 +856,7 @@ class CollectionTest < ActiveSupport::TestCase test "clear replication_confirmed* when introducing a new block in manifest" do c = collections(:replication_desired_2_confirmed_2) act_as_user users(:active) do - assert c.update_attributes(manifest_text: collections(:user_agreement).signed_manifest_text) + assert c.update_attributes(manifest_text: collections(:user_agreement).signed_manifest_text_only_for_tests) assert_nil c.replication_confirmed assert_nil c.replication_confirmed_at end @@ -865,7 +865,7 @@ class CollectionTest < ActiveSupport::TestCase test "don't clear replication_confirmed* when just renaming a file" do c = collections(:replication_desired_2_confirmed_2) act_as_user users(:active) do - new_manifest = c.signed_manifest_text.sub(':bar', ':foo') + new_manifest = c.signed_manifest_text_only_for_tests.sub(':bar', ':foo') assert c.update_attributes(manifest_text: new_manifest) assert_equal 2, c.replication_confirmed assert_not_nil c.replication_confirmed_at @@ -875,13 +875,13 @@ class CollectionTest < ActiveSupport::TestCase test "don't clear replication_confirmed* when just deleting a data block" do c = collections(:replication_desired_2_confirmed_2) act_as_user users(:active) do - new_manifest = c.signed_manifest_text + new_manifest = c.signed_manifest_text_only_for_tests new_manifest.sub!(/ \S+:bar/, '') new_manifest.sub!(/ acbd\S+/, '') # Confirm that we did just remove a block from the manifest (if # not, this test would pass without testing the relevant case): - assert_operator new_manifest.length+40, :<, c.signed_manifest_text.length + assert_operator new_manifest.length+40, :<, c.signed_manifest_text_only_for_tests.length assert c.update_attributes(manifest_text: new_manifest) assert_equal 2, c.replication_confirmed @@ -895,7 +895,7 @@ class CollectionTest < ActiveSupport::TestCase c = Collection.create!(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n", name: 'foo') c.update_attributes! trash_at: (t0 + 1.hours) c.reload - sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text)[1].to_i + sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text_only_for_tests)[1].to_i assert_operator sig_exp.to_i, :<=, (t0 + 1.hours).to_i end end @@ -905,7 +905,7 @@ class CollectionTest < ActiveSupport::TestCase c = Collection.create!(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n", name: 'foo', trash_at: db_current_time + 1.years) - sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text)[1].to_i + sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text_only_for_tests)[1].to_i expect_max_sig_exp = db_current_time.to_i + Rails.configuration.Collections.BlobSigningTTL.to_i assert_operator c.trash_at.to_i, :>, expect_max_sig_exp assert_operator sig_exp.to_i, :<=, expect_max_sig_exp diff --git a/services/arv-git-httpd/auth_handler_test.go b/services/arv-git-httpd/auth_handler_test.go index 4e1a47dcb2..688b256dcb 100644 --- a/services/arv-git-httpd/auth_handler_test.go +++ b/services/arv-git-httpd/auth_handler_test.go @@ -26,14 +26,6 @@ type AuthHandlerSuite struct { cluster *arvados.Cluster } -func (s *AuthHandlerSuite) SetUpSuite(c *check.C) { - arvadostest.StartAPI() -} - -func (s *AuthHandlerSuite) TearDownSuite(c *check.C) { - arvadostest.StopAPI() -} - func (s *AuthHandlerSuite) SetUpTest(c *check.C) { arvadostest.ResetEnv() repoRoot, err := filepath.Abs("../api/tmp/git/test") diff --git a/services/arv-git-httpd/integration_test.go b/services/arv-git-httpd/integration_test.go index 7da85ee747..93a46e2248 100644 --- a/services/arv-git-httpd/integration_test.go +++ b/services/arv-git-httpd/integration_test.go @@ -33,14 +33,6 @@ type IntegrationSuite struct { cluster *arvados.Cluster } -func (s *IntegrationSuite) SetUpSuite(c *check.C) { - arvadostest.StartAPI() -} - -func (s *IntegrationSuite) TearDownSuite(c *check.C) { - arvadostest.StopAPI() -} - func (s *IntegrationSuite) SetUpTest(c *check.C) { arvadostest.ResetEnv() diff --git a/services/crunch-dispatch-local/crunch-dispatch-local_test.go b/services/crunch-dispatch-local/crunch-dispatch-local_test.go index 92b8d2adcd..7e8c42c25c 100644 --- a/services/crunch-dispatch-local/crunch-dispatch-local_test.go +++ b/services/crunch-dispatch-local/crunch-dispatch-local_test.go @@ -39,15 +39,10 @@ var initialArgs []string func (s *TestSuite) SetUpSuite(c *C) { initialArgs = os.Args - arvadostest.StartAPI() runningCmds = make(map[string]*exec.Cmd) logrus.SetFormatter(&logrus.TextFormatter{DisableColors: true}) } -func (s *TestSuite) TearDownSuite(c *C) { - arvadostest.StopAPI() -} - func (s *TestSuite) SetUpTest(c *C) { args := []string{"crunch-dispatch-local"} os.Args = args diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go index e7a89db23c..cf83257dad 100644 --- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go +++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go @@ -42,7 +42,8 @@ type IntegrationSuite struct { } func (s *IntegrationSuite) SetUpTest(c *C) { - arvadostest.StartAPI() + arvadostest.ResetEnv() + arvadostest.ResetDB(c) os.Setenv("ARVADOS_API_TOKEN", arvadostest.Dispatch1Token) s.disp = Dispatcher{} s.disp.cluster = &arvados.Cluster{} @@ -52,7 +53,7 @@ func (s *IntegrationSuite) SetUpTest(c *C) { func (s *IntegrationSuite) TearDownTest(c *C) { arvadostest.ResetEnv() - arvadostest.StopAPI() + arvadostest.ResetDB(c) } type slurmFake struct { diff --git a/services/keep-balance/integration_test.go b/services/keep-balance/integration_test.go index 52e6149158..a6cc328104 100644 --- a/services/keep-balance/integration_test.go +++ b/services/keep-balance/integration_test.go @@ -38,7 +38,6 @@ func (s *integrationSuite) SetUpSuite(c *check.C) { c.Skip("-short") } arvadostest.ResetEnv() - arvadostest.StartAPI() arvadostest.StartKeep(4, true) arv, err := arvadosclient.MakeArvadosClient() @@ -62,7 +61,6 @@ func (s *integrationSuite) TearDownSuite(c *check.C) { c.Skip("-short") } arvadostest.StopKeep(4) - arvadostest.StopAPI() } func (s *integrationSuite) SetUpTest(c *check.C) { diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go index a65a48892a..21d7672087 100644 --- a/services/keep-web/server_test.go +++ b/services/keep-web/server_test.go @@ -410,7 +410,7 @@ func (s *IntegrationSuite) TestMetrics(c *check.C) { } func (s *IntegrationSuite) SetUpSuite(c *check.C) { - arvadostest.StartAPI() + arvadostest.ResetDB(c) arvadostest.StartKeep(2, true) arv, err := arvadosclient.MakeArvadosClient() @@ -426,7 +426,6 @@ func (s *IntegrationSuite) SetUpSuite(c *check.C) { func (s *IntegrationSuite) TearDownSuite(c *check.C) { arvadostest.StopKeep(2) - arvadostest.StopAPI() } func (s *IntegrationSuite) SetUpTest(c *check.C) { diff --git a/services/keepproxy/keepproxy_test.go b/services/keepproxy/keepproxy_test.go index 4bdb420202..052109bf29 100644 --- a/services/keepproxy/keepproxy_test.go +++ b/services/keepproxy/keepproxy_test.go @@ -76,7 +76,6 @@ func closeListener() { } func (s *ServerRequiredSuite) SetUpSuite(c *C) { - arvadostest.StartAPI() arvadostest.StartKeep(2, false) } @@ -86,11 +85,9 @@ func (s *ServerRequiredSuite) SetUpTest(c *C) { func (s *ServerRequiredSuite) TearDownSuite(c *C) { arvadostest.StopKeep(2) - arvadostest.StopAPI() } func (s *ServerRequiredConfigYmlSuite) SetUpSuite(c *C) { - arvadostest.StartAPI() // config.yml defines 4 keepstores arvadostest.StartKeep(4, false) } @@ -101,11 +98,9 @@ func (s *ServerRequiredConfigYmlSuite) SetUpTest(c *C) { func (s *ServerRequiredConfigYmlSuite) TearDownSuite(c *C) { arvadostest.StopKeep(4) - arvadostest.StopAPI() } func (s *NoKeepServerSuite) SetUpSuite(c *C) { - arvadostest.StartAPI() // We need API to have some keep services listed, but the // services themselves should be unresponsive. arvadostest.StartKeep(2, false) @@ -116,10 +111,6 @@ func (s *NoKeepServerSuite) SetUpTest(c *C) { arvadostest.ResetEnv() } -func (s *NoKeepServerSuite) TearDownSuite(c *C) { - arvadostest.StopAPI() -} - func runProxy(c *C, bogusClientToken bool, loadKeepstoresFromConfig bool, kp *arvados.UploadDownloadRolePermissions) (*keepclient.KeepClient, *bytes.Buffer) { cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load() c.Assert(err, Equals, nil) diff --git a/services/keepstore/handler_test.go b/services/keepstore/handler_test.go index 897447dd11..16dcd2aaf6 100644 --- a/services/keepstore/handler_test.go +++ b/services/keepstore/handler_test.go @@ -23,6 +23,7 @@ import ( "os" "sort" "strings" + "sync/atomic" "time" "git.arvados.org/arvados.git/lib/config" @@ -367,6 +368,94 @@ func (s *HandlerSuite) TestReadsOrderedByStorageClassPriority(c *check.C) { } } +func (s *HandlerSuite) TestPutWithNoWritableVolumes(c *check.C) { + s.cluster.Volumes = map[string]arvados.Volume{ + "zzzzz-nyw5e-111111111111111": { + Driver: "mock", + Replication: 1, + ReadOnly: true, + StorageClasses: map[string]bool{"class1": true}}, + } + c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil) + resp := IssueRequest(s.handler, + &RequestTester{ + method: "PUT", + uri: "/" + TestHash, + requestBody: TestBlock, + storageClasses: "class1", + }) + c.Check(resp.Code, check.Equals, FullError.HTTPCode) + c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Put"), check.Equals, 0) +} + +func (s *HandlerSuite) TestConcurrentWritesToMultipleStorageClasses(c *check.C) { + s.cluster.Volumes = map[string]arvados.Volume{ + "zzzzz-nyw5e-111111111111111": { + Driver: "mock", + Replication: 1, + StorageClasses: map[string]bool{"class1": true}}, + "zzzzz-nyw5e-121212121212121": { + Driver: "mock", + Replication: 1, + StorageClasses: map[string]bool{"class1": true, "class2": true}}, + "zzzzz-nyw5e-222222222222222": { + Driver: "mock", + Replication: 1, + StorageClasses: map[string]bool{"class2": true}}, + } + + for _, trial := range []struct { + setCounter uint32 // value to stuff vm.counter, to control offset + classes string // desired classes + put111 int // expected number of "put" ops on 11111... after 2x put reqs + put121 int // expected number of "put" ops on 12121... + put222 int // expected number of "put" ops on 22222... + cmp111 int // expected number of "compare" ops on 11111... after 2x put reqs + cmp121 int // expected number of "compare" ops on 12121... + cmp222 int // expected number of "compare" ops on 22222... + }{ + {0, "class1", + 1, 0, 0, + 2, 1, 0}, // first put compares on all vols with class2; second put succeeds after checking 121 + {0, "class2", + 0, 1, 0, + 0, 2, 1}, // first put compares on all vols with class2; second put succeeds after checking 121 + {0, "class1,class2", + 1, 1, 0, + 2, 2, 1}, // first put compares on all vols; second put succeeds after checking 111 and 121 + {1, "class1,class2", + 0, 1, 0, // vm.counter offset is 1 so the first volume attempted is 121 + 2, 2, 1}, // first put compares on all vols; second put succeeds after checking 111 and 121 + {0, "class1,class2,class404", + 1, 1, 0, + 2, 2, 1}, // first put compares on all vols; second put doesn't compare on 222 because it already satisfied class2 on 121 + } { + c.Logf("%+v", trial) + s.cluster.StorageClasses = map[string]arvados.StorageClassConfig{ + "class1": {}, + "class2": {}, + "class3": {}, + } + c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil) + atomic.StoreUint32(&s.handler.volmgr.counter, trial.setCounter) + for i := 0; i < 2; i++ { + IssueRequest(s.handler, + &RequestTester{ + method: "PUT", + uri: "/" + TestHash, + requestBody: TestBlock, + storageClasses: trial.classes, + }) + } + c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put111) + c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-121212121212121"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put121) + c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-222222222222222"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put222) + c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp111) + c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-121212121212121"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp121) + c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-222222222222222"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp222) + } +} + // Test TOUCH requests. func (s *HandlerSuite) TestTouchHandler(c *check.C) { c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil) diff --git a/services/keepstore/handlers.go b/services/keepstore/handlers.go index 2b469a13eb..910033ebb1 100644 --- a/services/keepstore/handlers.go +++ b/services/keepstore/handlers.go @@ -18,6 +18,7 @@ import ( "strconv" "strings" "sync" + "sync/atomic" "time" "git.arvados.org/arvados.git/sdk/go/arvados" @@ -741,6 +742,7 @@ func GetBlock(ctx context.Context, volmgr *RRVolumeManager, hash string, buf []b } type putProgress struct { + classNeeded map[string]bool classTodo map[string]bool mountUsed map[*VolumeMount]bool totalReplication int @@ -769,7 +771,7 @@ func (pr putProgress) ClassReplication() string { func (pr *putProgress) Add(mnt *VolumeMount) { if pr.mountUsed[mnt] { - logrus.Warnf("BUG? superfluous extra write to mount %s", mnt) + logrus.Warnf("BUG? superfluous extra write to mount %s", mnt.UUID) return } pr.mountUsed[mnt] = true @@ -780,6 +782,21 @@ func (pr *putProgress) Add(mnt *VolumeMount) { } } +func (pr *putProgress) Sub(mnt *VolumeMount) { + if !pr.mountUsed[mnt] { + logrus.Warnf("BUG? Sub called with no prior matching Add: %s", mnt.UUID) + return + } + pr.mountUsed[mnt] = false + pr.totalReplication -= mnt.Replication + for class := range mnt.StorageClasses { + pr.classDone[class] -= mnt.Replication + if pr.classNeeded[class] { + pr.classTodo[class] = true + } + } +} + func (pr *putProgress) Done() bool { return len(pr.classTodo) == 0 && pr.totalReplication > 0 } @@ -800,47 +817,65 @@ func (pr *putProgress) Want(mnt *VolumeMount) bool { return false } -func newPutResult(classes []string) putProgress { +func (pr *putProgress) Copy() *putProgress { + cp := putProgress{ + classNeeded: pr.classNeeded, + classTodo: make(map[string]bool, len(pr.classTodo)), + classDone: make(map[string]int, len(pr.classDone)), + mountUsed: make(map[*VolumeMount]bool, len(pr.mountUsed)), + totalReplication: pr.totalReplication, + } + for k, v := range pr.classTodo { + cp.classTodo[k] = v + } + for k, v := range pr.classDone { + cp.classDone[k] = v + } + for k, v := range pr.mountUsed { + cp.mountUsed[k] = v + } + return &cp +} + +func newPutProgress(classes []string) putProgress { pr := putProgress{ - classTodo: make(map[string]bool, len(classes)), - classDone: map[string]int{}, - mountUsed: map[*VolumeMount]bool{}, + classNeeded: make(map[string]bool, len(classes)), + classTodo: make(map[string]bool, len(classes)), + classDone: map[string]int{}, + mountUsed: map[*VolumeMount]bool{}, } for _, c := range classes { if c != "" { + pr.classNeeded[c] = true pr.classTodo[c] = true } } return pr } -// PutBlock Stores the BLOCK (identified by the content id HASH) in Keep. -// -// PutBlock(ctx, block, hash) -// Stores the BLOCK (identified by the content id HASH) in Keep. -// -// The MD5 checksum of the block must be identical to the content id HASH. -// If not, an error is returned. +// PutBlock stores the given block on one or more volumes. // -// PutBlock stores the BLOCK on the first Keep volume with free space. -// A failure code is returned to the user only if all volumes fail. +// The MD5 checksum of the block must match the given hash. // -// On success, PutBlock returns nil. -// On failure, it returns a KeepError with one of the following codes: +// The block is written to each writable volume (ordered by priority +// and then UUID, see volume.go) until at least one replica has been +// stored in each of the requested storage classes. // -// 500 Collision -// A different block with the same hash already exists on this -// Keep server. -// 422 MD5Fail -// The MD5 hash of the BLOCK does not match the argument HASH. -// 503 Full -// There was not enough space left in any Keep volume to store -// the object. -// 500 Fail -// The object could not be stored for some other reason (e.g. -// all writes failed). The text of the error message should -// provide as much detail as possible. +// The returned error, if any, is a KeepError with one of the +// following codes: // +// 500 Collision +// A different block with the same hash already exists on this +// Keep server. +// 422 MD5Fail +// The MD5 hash of the BLOCK does not match the argument HASH. +// 503 Full +// There was not enough space left in any Keep volume to store +// the object. +// 500 Fail +// The object could not be stored for some other reason (e.g. +// all writes failed). The text of the error message should +// provide as much detail as possible. func PutBlock(ctx context.Context, volmgr *RRVolumeManager, block []byte, hash string, wantStorageClasses []string) (putProgress, error) { log := ctxlog.FromContext(ctx) @@ -851,72 +886,88 @@ func PutBlock(ctx context.Context, volmgr *RRVolumeManager, block []byte, hash s return putProgress{}, RequestHashError } - result := newPutResult(wantStorageClasses) + result := newPutProgress(wantStorageClasses) // If we already have this data, it's intact on disk, and we // can update its timestamp, return success. If we have // different data with the same hash, return failure. - if err := CompareAndTouch(ctx, volmgr, hash, block, &result); err != nil { + if err := CompareAndTouch(ctx, volmgr, hash, block, &result); err != nil || result.Done() { return result, err } if ctx.Err() != nil { return result, ErrClientDisconnect } - // Choose a Keep volume to write to. - // If this volume fails, try all of the volumes in order. - if mnt := volmgr.NextWritable(); mnt == nil || !result.Want(mnt) { - // fall through to "try all volumes" below - } else if err := mnt.Put(ctx, hash, block); err != nil { - log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash) - } else { - result.Add(mnt) - if result.Done() { - return result, nil - } - } - if ctx.Err() != nil { - return putProgress{}, ErrClientDisconnect - } - - writables := volmgr.AllWritable() + writables := volmgr.NextWritable() if len(writables) == 0 { log.Error("no writable volumes") - return putProgress{}, FullError + return result, FullError } - allFull := true + var wg sync.WaitGroup + var mtx sync.Mutex + cond := sync.Cond{L: &mtx} + // pending predicts what result will be if all pending writes + // succeed. + pending := result.Copy() + var allFull atomic.Value + allFull.Store(true) + + // We hold the lock for the duration of the "each volume" loop + // below, except when it is released during cond.Wait(). + mtx.Lock() + for _, mnt := range writables { + // Wait until our decision to use this mount does not + // depend on the outcome of pending writes. + for result.Want(mnt) && !pending.Want(mnt) { + cond.Wait() + } if !result.Want(mnt) { continue } - err := mnt.Put(ctx, hash, block) - if ctx.Err() != nil { - return result, ErrClientDisconnect - } - switch err { - case nil: - result.Add(mnt) - if result.Done() { - return result, nil + mnt := mnt + pending.Add(mnt) + wg.Add(1) + go func() { + log.Debugf("PutBlock: start write to %s", mnt.UUID) + defer wg.Done() + err := mnt.Put(ctx, hash, block) + + mtx.Lock() + if err != nil { + log.Debugf("PutBlock: write to %s failed", mnt.UUID) + pending.Sub(mnt) + } else { + log.Debugf("PutBlock: write to %s succeeded", mnt.UUID) + result.Add(mnt) } - continue - case FullError: - continue - default: - // The volume is not full but the - // write did not succeed. Report the - // error and continue trying. - allFull = false - log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash) - } + cond.Broadcast() + mtx.Unlock() + + if err != nil && err != FullError && ctx.Err() == nil { + // The volume is not full but the + // write did not succeed. Report the + // error and continue trying. + allFull.Store(false) + log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash) + } + }() + } + mtx.Unlock() + wg.Wait() + if ctx.Err() != nil { + return result, ErrClientDisconnect + } + if result.Done() { + return result, nil } if result.totalReplication > 0 { // Some, but not all, of the storage classes were // satisfied. This qualifies as success. return result, nil - } else if allFull { + } else if allFull.Load().(bool) { log.Error("all volumes with qualifying storage classes are full") return putProgress{}, FullError } else { diff --git a/services/keepstore/pull_worker_integration_test.go b/services/keepstore/pull_worker_integration_test.go index ad7ec15e72..eb7fe5fd67 100644 --- a/services/keepstore/pull_worker_integration_test.go +++ b/services/keepstore/pull_worker_integration_test.go @@ -58,7 +58,6 @@ func (s *HandlerSuite) TestPullWorkerIntegration_GetNonExistingLocator(c *check. } pullRequest := s.setupPullWorkerIntegrationTest(c, testData, false) - defer arvadostest.StopAPI() defer arvadostest.StopKeep(2) s.performPullWorkerIntegrationTest(testData, pullRequest, c) @@ -76,7 +75,6 @@ func (s *HandlerSuite) TestPullWorkerIntegration_GetExistingLocator(c *check.C) } pullRequest := s.setupPullWorkerIntegrationTest(c, testData, true) - defer arvadostest.StopAPI() defer arvadostest.StopKeep(2) s.performPullWorkerIntegrationTest(testData, pullRequest, c) diff --git a/services/keepstore/volume.go b/services/keepstore/volume.go index 9bfc6ca3e5..3f7c9cb79b 100644 --- a/services/keepstore/volume.go +++ b/services/keepstore/volume.go @@ -344,11 +344,11 @@ func makeRRVolumeManager(logger logrus.FieldLogger, cluster *arvados.Cluster, my vm.writables = append(vm.writables, mnt) } } - // pri(i): return highest priority of any storage class - // offered by vm.readables[i] - pri := func(i int) int { + // pri(mnt): return highest priority of any storage class + // offered by mnt + pri := func(mnt *VolumeMount) int { any, best := false, 0 - for class := range vm.readables[i].KeepMount.StorageClasses { + for class := range mnt.KeepMount.StorageClasses { if p := cluster.StorageClasses[class].Priority; !any || best < p { best = p any = true @@ -356,14 +356,20 @@ func makeRRVolumeManager(logger logrus.FieldLogger, cluster *arvados.Cluster, my } return best } - // sort vm.readables, first by highest priority of any offered + // less(a,b): sort first by highest priority of any offered // storage class (highest->lowest), then by volume UUID - sort.Slice(vm.readables, func(i, j int) bool { - if pi, pj := pri(i), pri(j); pi != pj { - return pi > pj + less := func(a, b *VolumeMount) bool { + if pa, pb := pri(a), pri(b); pa != pb { + return pa > pb } else { - return vm.readables[i].KeepMount.UUID < vm.readables[j].KeepMount.UUID + return a.KeepMount.UUID < b.KeepMount.UUID } + } + sort.Slice(vm.readables, func(i, j int) bool { + return less(vm.readables[i], vm.readables[j]) + }) + sort.Slice(vm.writables, func(i, j int) bool { + return less(vm.writables[i], vm.writables[j]) }) return vm, nil } @@ -384,18 +390,22 @@ func (vm *RRVolumeManager) AllReadable() []*VolumeMount { return vm.readables } -// AllWritable returns an array of all writable volumes +// AllWritable returns writable volumes, sorted by priority/uuid. Used +// by CompareAndTouch to ensure higher-priority volumes are checked +// first. func (vm *RRVolumeManager) AllWritable() []*VolumeMount { return vm.writables } -// NextWritable returns the next writable -func (vm *RRVolumeManager) NextWritable() *VolumeMount { +// NextWritable returns writable volumes, rotated by vm.counter so +// each volume gets a turn to be first. Used by PutBlock to distribute +// new data across available volumes. +func (vm *RRVolumeManager) NextWritable() []*VolumeMount { if len(vm.writables) == 0 { return nil } - i := atomic.AddUint32(&vm.counter, 1) - return vm.writables[i%uint32(len(vm.writables))] + offset := (int(atomic.AddUint32(&vm.counter, 1)) - 1) % len(vm.writables) + return append(append([]*VolumeMount(nil), vm.writables[offset:]...), vm.writables[:offset]...) } // VolumeStats returns an ioStats for the given volume. diff --git a/tools/arvbox/lib/arvbox/docker/Dockerfile.base b/tools/arvbox/lib/arvbox/docker/Dockerfile.base index 79f0d3f4f6..c112972c43 100644 --- a/tools/arvbox/lib/arvbox/docker/Dockerfile.base +++ b/tools/arvbox/lib/arvbox/docker/Dockerfile.base @@ -73,7 +73,7 @@ ENV DEBIAN_FRONTEND noninteractive # gnupg2 runit python3-pip python3-setuptools python3-yaml shellinabox netcat less RUN apt-get update && \ apt-get -yq --no-install-recommends -o Acquire::Retries=6 install \ - gnupg2 runit python3-pip python3-setuptools python3-yaml shellinabox netcat less && \ + gnupg2 runit python3-pip python3-setuptools python3-yaml shellinabox netcat less vim-tiny && \ apt-get clean ENV GOPATH /var/lib/gopath diff --git a/tools/arvbox/lib/arvbox/docker/service/workbench2/run-service b/tools/arvbox/lib/arvbox/docker/service/workbench2/run-service index fb3eaaeee8..a112cb93fe 100755 --- a/tools/arvbox/lib/arvbox/docker/service/workbench2/run-service +++ b/tools/arvbox/lib/arvbox/docker/service/workbench2/run-service @@ -59,5 +59,6 @@ fi export VERSION=$(./version-at-commit.sh) export BROWSER=none export CI=true +export HTTPS=false node --version exec node node_modules/react-scripts/scripts/start.js diff --git a/tools/keep-block-check/keep-block-check_test.go b/tools/keep-block-check/keep-block-check_test.go index 9f409e6af0..e6519fb377 100644 --- a/tools/keep-block-check/keep-block-check_test.go +++ b/tools/keep-block-check/keep-block-check_test.go @@ -43,12 +43,7 @@ var TestHash2 = "aaaac516f788aec4f30932ffb6395c39" var blobSignatureTTL = time.Duration(2*7*24) * time.Hour -func (s *ServerRequiredSuite) SetUpSuite(c *C) { - arvadostest.StartAPI() -} - func (s *ServerRequiredSuite) TearDownSuite(c *C) { - arvadostest.StopAPI() arvadostest.ResetEnv() } diff --git a/tools/keep-rsync/keep-rsync_test.go b/tools/keep-rsync/keep-rsync_test.go index 38968ae128..45ed3f67f5 100644 --- a/tools/keep-rsync/keep-rsync_test.go +++ b/tools/keep-rsync/keep-rsync_test.go @@ -43,12 +43,7 @@ var _ = Suite(&DoMainTestSuite{}) type ServerRequiredSuite struct{} -func (s *ServerRequiredSuite) SetUpSuite(c *C) { - arvadostest.StartAPI() -} - func (s *ServerRequiredSuite) TearDownSuite(c *C) { - arvadostest.StopAPI() arvadostest.ResetEnv() } diff --git a/tools/salt-install/Vagrantfile b/tools/salt-install/Vagrantfile index 3019a9fb1c..a3463bfc5c 100644 --- a/tools/salt-install/Vagrantfile +++ b/tools/salt-install/Vagrantfile @@ -35,7 +35,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| cp -vr /vagrant/tests /home/vagrant/tests; sed 's#cluster_fixme_or_this_wont_work#harpo#g; s#domain_fixme_or_this_wont_work#local#g; - s/#\ BRANCH=\"master\"/\ BRANCH=\"master\"/g; + s/#\ BRANCH=\"main\"/\ BRANCH=\"main\"/g; s#CONTROLLER_EXT_SSL_PORT=443#CONTROLLER_EXT_SSL_PORT=8443#g' \ /vagrant/local.params.example.single_host_multiple_hostnames > /tmp/local.params.single_host_multiple_hostnames" arv.vm.provision "shell", @@ -78,7 +78,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| cp -vr /vagrant/tests /home/vagrant/tests; sed 's#HOSTNAME_EXT=\"\"#HOSTNAME_EXT=\"zeppo.local\"#g; s#cluster_fixme_or_this_wont_work#zeppo#g; - s/#\ BRANCH=\"master\"/\ BRANCH=\"master\"/g; + s/#\ BRANCH=\"main\"/\ BRANCH=\"main\"/g; s#domain_fixme_or_this_wont_work#local#g;' \ /vagrant/local.params.example.single_host_single_hostname > /tmp/local.params.single_host_single_hostname" arv.vm.provision "shell", diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/arvados.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/arvados.sls index 23e0076504..ccf6bac789 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/arvados.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/arvados.sls @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- +# vim: ft=yaml --- # Copyright (C) The Arvados Authors. All rights reserved. # @@ -26,6 +28,7 @@ arvados: ## manage OS packages with some other tool and you don't want us messing up ## with your setup. ruby: + ## We set these to `true` here for testing purposes. ## They both default to `false`. manage_ruby: true @@ -67,8 +70,15 @@ arvados: host: 127.0.0.1 password: "__DATABASE_PASSWORD__" user: __CLUSTER___arvados - encoding: en_US.utf8 - client_encoding: UTF8 + extra_conn_params: + client_encoding: UTF8 + # Centos7 does not enable SSL by default, so we disable + # it here just for testing of the formula purposes only. + # You should not do this in production, and should + # configure Postgres certificates correctly + {%- if grains.os_family in ('RedHat',) %} + sslmode: disable + {%- endif %} tls: # certificate: '' @@ -76,6 +86,13 @@ arvados: # required to test with arvados-snakeoil certs insecure: true + resources: + virtual_machines: + shell: + name: webshell + backend: 127.0.1.1 + port: 4200 + ### TOKENS tokens: system_root: __SYSTEM_ROOT_TOKEN__ diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_api_configuration.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_api_configuration.sls index b2f12c7739..54087f6d6d 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_api_configuration.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_api_configuration.sls @@ -3,17 +3,23 @@ # # SPDX-License-Identifier: AGPL-3.0 +{%- if grains.os_family in ('RedHat',) %} + {%- set group = 'nginx' %} +{%- else %} + {%- set group = 'www-data' %} +{%- endif %} + ### ARVADOS arvados: config: - group: www-data + group: {{ group }} ### NGINX nginx: ### SITES servers: managed: - arvados_api: + arvados_api.conf: enabled: true overwrite: true config: diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_controller_configuration.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_controller_configuration.sls index 3adf0580a4..195e9af82e 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_controller_configuration.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_controller_configuration.sls @@ -20,7 +20,7 @@ nginx: servers: managed: ### DEFAULT - arvados_controller_default: + arvados_controller_default.conf: enabled: true overwrite: true config: @@ -33,9 +33,11 @@ nginx: - location /: - return: '301 https://$host$request_uri' - arvados_controller_ssl: + arvados_controller_ssl.conf: enabled: true overwrite: true + requires: + file: nginx_snippet_arvados-snakeoil.conf config: - server: - server_name: __CLUSTER__.__DOMAIN__ @@ -52,7 +54,8 @@ nginx: - proxy_set_header: 'X-Real-IP $remote_addr' - proxy_set_header: 'X-Forwarded-For $proxy_add_x_forwarded_for' - proxy_set_header: 'X-External-Client $external_client' - - include: 'snippets/arvados-snakeoil.conf' + - include: snippets/ssl_hardening_default.conf + - include: snippets/arvados-snakeoil.conf - access_log: /var/log/nginx/__CLUSTER__.__DOMAIN__.access.log combined - error_log: /var/log/nginx/__CLUSTER__.__DOMAIN__.error.log - client_max_body_size: 128m diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepproxy_configuration.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepproxy_configuration.sls index 2d8922df9a..91179d4a86 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepproxy_configuration.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepproxy_configuration.sls @@ -16,7 +16,7 @@ nginx: servers: managed: ### DEFAULT - arvados_keepproxy_default: + arvados_keepproxy_default.conf: enabled: true overwrite: true config: @@ -29,9 +29,11 @@ nginx: - location /: - return: '301 https://$host$request_uri' - arvados_keepproxy_ssl: + arvados_keepproxy_ssl.conf: enabled: true overwrite: true + requires: + file: nginx_snippet_arvados-snakeoil.conf config: - server: - server_name: keep.__CLUSTER__.__DOMAIN__ @@ -52,6 +54,7 @@ nginx: - client_max_body_size: 64M - proxy_http_version: '1.1' - proxy_request_buffering: 'off' - - include: 'snippets/arvados-snakeoil.conf' + - include: snippets/ssl_hardening_default.conf + - include: snippets/arvados-snakeoil.conf - access_log: /var/log/nginx/keepproxy.__CLUSTER__.__DOMAIN__.access.log combined - error_log: /var/log/nginx/keepproxy.__CLUSTER__.__DOMAIN__.error.log diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepweb_configuration.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepweb_configuration.sls index d180a3bad4..9ea16bfb54 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepweb_configuration.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_keepweb_configuration.sls @@ -16,7 +16,7 @@ nginx: servers: managed: ### DEFAULT - arvados_collections_download_default: + arvados_collections_download_default.conf: enabled: true overwrite: true config: @@ -30,9 +30,11 @@ nginx: - return: '301 https://$host$request_uri' ### COLLECTIONS / DOWNLOAD - arvados_collections_download_ssl: + arvados_collections_download_ssl.conf: enabled: true overwrite: true + requires: + file: nginx_snippet_arvados-snakeoil.conf config: - server: - server_name: collections.__CLUSTER__.__DOMAIN__ download.__CLUSTER__.__DOMAIN__ @@ -52,6 +54,7 @@ nginx: - client_max_body_size: 0 - proxy_http_version: '1.1' - proxy_request_buffering: 'off' - - include: 'snippets/arvados-snakeoil.conf' + - include: snippets/ssl_hardening_default.conf + - include: snippets/arvados-snakeoil.conf - access_log: /var/log/nginx/collections.__CLUSTER__.__DOMAIN__.access.log combined - error_log: /var/log/nginx/collections.__CLUSTER__.__DOMAIN__.error.log diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_passenger.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_passenger.sls index 6ce75faa70..a4d3c34f26 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_passenger.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_passenger.sls @@ -3,19 +3,69 @@ # # SPDX-License-Identifier: AGPL-3.0 +{%- set passenger_pkg = 'nginx-mod-http-passenger' + if grains.osfinger in ('CentOS Linux-7') else + 'libnginx-mod-http-passenger' %} +{%- set passenger_mod = '/usr/lib64/nginx/modules/ngx_http_passenger_module.so' + if grains.osfinger in ('CentOS Linux-7',) else + '/usr/lib/nginx/modules/ngx_http_passenger_module.so' %} +{%- set passenger_ruby = '/usr/local/rvm/rubies/ruby-2.7.2/bin/ruby' + if grains.osfinger in ('CentOS Linux-7', 'Ubuntu-18.04',) else + '/usr/bin/ruby' %} + ### NGINX nginx: install_from_phusionpassenger: true lookup: - passenger_package: libnginx-mod-http-passenger - passenger_config_file: /etc/nginx/conf.d/mod-http-passenger.conf + passenger_package: {{ passenger_pkg }} + ### PASSENGER + passenger: + passenger_ruby: {{ passenger_ruby }} ### SERVER server: config: - include: 'modules-enabled/*.conf' + # This is required to get the passenger module loaded + # In Debian it can be done with this + # include: 'modules-enabled/*.conf' + load_module: {{ passenger_mod }} + worker_processes: 4 + ### SNIPPETS + snippets: + # Based on https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.4 + ssl_hardening_default.conf: + - ssl_session_timeout: 1d + - ssl_session_cache: 'shared:arvadosSSL:10m' + - ssl_session_tickets: 'off' + + # intermediate configuration + - ssl_protocols: TLSv1.2 TLSv1.3 + - ssl_ciphers: ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384 + - ssl_prefer_server_ciphers: 'off' + + # HSTS (ngx_http_headers_module is required) (63072000 seconds) + - add_header: 'Strict-Transport-Security "max-age=63072000" always' + + # OCSP stapling + # FIXME! Stapling does not work with self-signed certificates, so disabling for tests + # - ssl_stapling: 'on' + # - ssl_stapling_verify: 'on' + + # verify chain of trust of OCSP response using Root CA and Intermediate certs + # - ssl_trusted_certificate /path/to/root_CA_cert_plus_intermediates + + # curl https://ssl-config.mozilla.org/ffdhe2048.txt > /path/to/dhparam + # - ssl_dhparam: /path/to/dhparam + + # replace with the IP address of your resolver + # - resolver: 127.0.0.1 + + arvados-snakeoil.conf: + - ssl_certificate: /etc/ssl/private/arvados-snakeoil-cert.pem + - ssl_certificate_key: /etc/ssl/private/arvados-snakeoil-cert.key + ### SITES servers: managed: diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_webshell_configuration.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_webshell_configuration.sls index e75f044343..9b73ab4a09 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_webshell_configuration.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_webshell_configuration.sls @@ -3,6 +3,20 @@ # # SPDX-License-Identifier: AGPL-3.0 +# This parameter will be used here to generate a list of upstreams and vhosts. +# This dict is here for convenience and should be managed some other way, but the +# different ways of orchestration that can be used for this are outside the scope +# of this formula and their examples. +# These upstreams should match those defined in `arvados:cluster:resources:virtual_machines` +{% set webshell_virtual_machines = { + 'shell': { + 'name': 'webshell', + 'backend': '127.0.1.1', + 'port': 4200, + } +} +%} + ### NGINX nginx: ### SERVER @@ -11,13 +25,20 @@ nginx: ### STREAMS http: - upstream webshell_upstream: - - server: 'shell.internal:4200 fail_timeout=10s' + {%- for vm, params in webshell_virtual_machines.items() %} + {%- set vm_name = params.name | default(vm) %} + {%- set vm_backend = params.backend | default(vm_name) %} + {%- set vm_port = params.port | default(4200) %} + + upstream {{ vm_name }}_upstream: + - server: '{{ vm_backend }}:{{ vm_port }} fail_timeout=10s' + + {%- endfor %} ### SITES servers: managed: - arvados_webshell_default: + arvados_webshell_default.conf: enabled: true overwrite: true config: @@ -30,17 +51,21 @@ nginx: - location /: - return: '301 https://$host$request_uri' - arvados_webshell_ssl: + arvados_webshell_ssl.conf: enabled: true overwrite: true + requires: + file: nginx_snippet_arvados-snakeoil.conf config: - server: - server_name: webshell.__CLUSTER__.__DOMAIN__ - listen: - __CONTROLLER_EXT_SSL_PORT__ http2 ssl - index: index.html index.htm - - location /shell.__CLUSTER__.__DOMAIN__: - - proxy_pass: 'http://webshell_upstream' + {%- for vm, params in webshell_virtual_machines.items() %} + {%- set vm_name = params.name | default(vm) %} + - location /{{ vm_name }}: + - proxy_pass: 'http://{{ vm_name }}_upstream' - proxy_read_timeout: 90 - proxy_connect_timeout: 90 - proxy_set_header: 'Host $http_host' @@ -67,8 +92,9 @@ nginx: - add_header: "'Access-Control-Allow-Origin' '*'" - add_header: "'Access-Control-Allow-Methods' 'GET, POST, OPTIONS'" - add_header: "'Access-Control-Allow-Headers' 'DNT,X-CustomHeader,Keep-Alive,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type'" - - - include: 'snippets/arvados-snakeoil.conf' + {%- endfor %} + - include: snippets/ssl_hardening_default.conf + - include: snippets/arvados-snakeoil.conf - access_log: /var/log/nginx/webshell.__CLUSTER__.__DOMAIN__.access.log combined - error_log: /var/log/nginx/webshell.__CLUSTER__.__DOMAIN__.error.log diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_websocket_configuration.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_websocket_configuration.sls index 3a354ac293..bcd0457c9e 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_websocket_configuration.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_websocket_configuration.sls @@ -16,7 +16,7 @@ nginx: servers: managed: ### DEFAULT - arvados_websocket_default: + arvados_websocket_default.conf: enabled: true overwrite: true config: @@ -29,9 +29,11 @@ nginx: - location /: - return: '301 https://$host$request_uri' - arvados_websocket_ssl: + arvados_websocket_ssl.conf: enabled: true overwrite: true + requires: + file: nginx_snippet_arvados-snakeoil.conf config: - server: - server_name: ws.__CLUSTER__.__DOMAIN__ @@ -53,6 +55,7 @@ nginx: - client_max_body_size: 64M - proxy_http_version: '1.1' - proxy_request_buffering: 'off' - - include: 'snippets/arvados-snakeoil.conf' + - include: snippets/ssl_hardening_default.conf + - include: snippets/arvados-snakeoil.conf - access_log: /var/log/nginx/ws.__CLUSTER__.__DOMAIN__.access.log combined - error_log: /var/log/nginx/ws.__CLUSTER__.__DOMAIN__.error.log diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench2_configuration.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench2_configuration.sls index 8fdd553991..44bd16fe3e 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench2_configuration.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench2_configuration.sls @@ -1,12 +1,18 @@ --- # Copyright (C) The Arvados Authors. All rights reserved. # -# SPDX-License-Identifier: AGPL-3.0 +# SPDX-License-Identifier: Apache-2.0 + +{%- if grains.os_family in ('RedHat',) %} + {%- set group = 'nginx' %} +{%- else %} + {%- set group = 'www-data' %} +{%- endif %} ### ARVADOS arvados: config: - group: www-data + group: {{ group }} ### NGINX nginx: @@ -14,7 +20,7 @@ nginx: servers: managed: ### DEFAULT - arvados_workbench2_default: + arvados_workbench2_default.conf: enabled: true overwrite: true config: @@ -27,9 +33,11 @@ nginx: - location /: - return: '301 https://$host$request_uri' - arvados_workbench2_ssl: + arvados_workbench2_ssl.conf: enabled: true overwrite: true + requires: + file: nginx_snippet_arvados-snakeoil.conf config: - server: - server_name: workbench2.__CLUSTER__.__DOMAIN__ @@ -43,6 +51,7 @@ nginx: - return: 503 - location /config.json: - return: {{ "200 '" ~ '{"API_HOST":"__CLUSTER__.__DOMAIN__:__CONTROLLER_EXT_SSL_PORT__"}' ~ "'" }} - - include: 'snippets/arvados-snakeoil.conf' + - include: snippets/ssl_hardening_default.conf + - include: snippets/arvados-snakeoil.conf - access_log: /var/log/nginx/workbench2.__CLUSTER__.__DOMAIN__.access.log combined - error_log: /var/log/nginx/workbench2.__CLUSTER__.__DOMAIN__.error.log diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench_configuration.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench_configuration.sls index 649af10b6d..6b7ab969f9 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench_configuration.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_workbench_configuration.sls @@ -3,10 +3,16 @@ # # SPDX-License-Identifier: AGPL-3.0 +{%- if grains.os_family in ('RedHat',) %} + {%- set group = 'nginx' %} +{%- else %} + {%- set group = 'www-data' %} +{%- endif %} + ### ARVADOS arvados: config: - group: www-data + group: {{ group }} ### NGINX nginx: @@ -23,7 +29,7 @@ nginx: servers: managed: ### DEFAULT - arvados_workbench_default: + arvados_workbench_default.conf: enabled: true overwrite: true config: @@ -36,9 +42,11 @@ nginx: - location /: - return: '301 https://$host$request_uri' - arvados_workbench_ssl: + arvados_workbench_ssl.conf: enabled: true overwrite: true + requires: + file: nginx_snippet_arvados-snakeoil.conf config: - server: - server_name: workbench.__CLUSTER__.__DOMAIN__ @@ -54,11 +62,12 @@ nginx: - proxy_set_header: 'Host $http_host' - proxy_set_header: 'X-Real-IP $remote_addr' - proxy_set_header: 'X-Forwarded-For $proxy_add_x_forwarded_for' - - include: 'snippets/arvados-snakeoil.conf' + - include: snippets/ssl_hardening_default.conf + - include: snippets/arvados-snakeoil.conf - access_log: /var/log/nginx/workbench.__CLUSTER__.__DOMAIN__.access.log combined - error_log: /var/log/nginx/workbench.__CLUSTER__.__DOMAIN__.error.log - arvados_workbench_upstream: + arvados_workbench_upstream.conf: enabled: true overwrite: true config: diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/postgresql.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/postgresql.sls index 71e712cad3..fda1545a05 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/postgresql.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/postgresql.sls @@ -5,11 +5,29 @@ ### POSTGRESQL postgres: - use_upstream_repo: false + # Centos-7's postgres package is too old, so we need to force using upstream's + # This is not required in Debian's family as they already ship with PG +11 + {%- if salt['grains.get']('os_family') == 'RedHat' %} + use_upstream_repo: true + version: '12' + + pkgs_deps: + - libicu + - libxslt + - systemd-sysv + + pkgs_extra: + - postgresql12-contrib + + {%- else %} pkgs_extra: - postgresql-contrib + {%- endif %} postgresconf: |- listen_addresses = '*' # listen on all interfaces + #ssl = on + #ssl_cert_file = '/etc/ssl/certs/arvados-snakeoil-cert.pem' + #ssl_key_file = '/etc/ssl/private/arvados-snakeoil-cert.key' acls: - ['local', 'all', 'postgres', 'peer'] - ['local', 'all', 'all', 'peer'] diff --git a/tools/salt-install/config_examples/single_host/multiple_hostnames/states/snakeoil_certs.sls b/tools/salt-install/config_examples/single_host/multiple_hostnames/states/snakeoil_certs.sls index fb1473def2..91617e4fa4 100644 --- a/tools/salt-install/config_examples/single_host/multiple_hostnames/states/snakeoil_certs.sls +++ b/tools/salt-install/config_examples/single_host/multiple_hostnames/states/snakeoil_certs.sls @@ -1,15 +1,22 @@ # Copyright (C) The Arvados Authors. All rights reserved. # -# SPDX-License-Identifier: AGPL-3.0 +# SPDX-License-Identifier: Apache-2.0 {%- set curr_tpldir = tpldir %} {%- set tpldir = 'arvados' %} {%- from "arvados/map.jinja" import arvados with context %} {%- set tpldir = curr_tpldir %} -{%- set arvados_ca_cert_file = '/etc/ssl/certs/arvados-snakeoil-ca.pem' %} +include: + - nginx.passenger + - nginx.config + - nginx.service + +# Debian uses different dirs for certs and keys, but being a Snake Oil example, +# we'll keep it simple here. +{%- set arvados_ca_cert_file = '/etc/ssl/private/arvados-snakeoil-ca.pem' %} {%- set arvados_ca_key_file = '/etc/ssl/private/arvados-snakeoil-ca.key' %} -{%- set arvados_cert_file = '/etc/ssl/certs/arvados-snakeoil-cert.pem' %} +{%- set arvados_cert_file = '/etc/ssl/private/arvados-snakeoil-cert.pem' %} {%- set arvados_csr_file = '/etc/ssl/private/arvados-snakeoil-cert.csr' %} {%- set arvados_key_file = '/etc/ssl/private/arvados-snakeoil-cert.key' %} @@ -30,7 +37,7 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_dependencies_pkg_in - ca-certificates arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run: - # Taken from https://github.com/arvados/arvados/blob/main/tools/arvbox/lib/arvbox/docker/service/certificate/run + # Taken from https://github.com/arvados/arvados/blob/master/tools/arvbox/lib/arvbox/docker/service/certificate/run cmd.run: - name: | # These dirs are not to CentOS-ish, but this is a helper script @@ -121,6 +128,9 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_c - require: - pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_dependencies_pkg_installed - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run + # We need this before we can add the nginx's snippet + - require_in: + - file: nginx_snippet_arvados-snakeoil.conf {%- if grains.get('os_family') == 'Debian' %} arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed: @@ -130,29 +140,13 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_instal - sls: postgres arvados_test_salt_states_examples_single_host_snakeoil_certs_certs_permissions_cmd_run: - cmd.run: - - name: | - chown root:ssl-cert {{ arvados_key_file }} + file.managed: + - name: {{ arvados_key_file }} + - owner: root + - group: ssl-cert - require: - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_cert_cmd_run - pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed -{%- endif %} - -arvados_test_salt_states_examples_single_host_snakeoil_certs_nginx_snakeoil_file_managed: - file.managed: - - name: /etc/nginx/snippets/arvados-snakeoil.conf - - contents: | - ssl_certificate {{ arvados_cert_file }}; - ssl_certificate_key {{ arvados_key_file }}; - - watch_in: - - service: nginx_service - - require: - - pkg: passenger_install - - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_certs_permissions_cmd_run - require_in: - - file: nginx_config - - service: nginx_service - - watch_in: - - service: nginx_service - - + - file: nginx_snippet_arvados-snakeoil.conf +{%- endif %} diff --git a/tools/salt-install/config_examples/single_host/single_hostname/pillars/nginx_passenger.sls b/tools/salt-install/config_examples/single_host/single_hostname/pillars/nginx_passenger.sls index 6ce75faa70..a4d3c34f26 100644 --- a/tools/salt-install/config_examples/single_host/single_hostname/pillars/nginx_passenger.sls +++ b/tools/salt-install/config_examples/single_host/single_hostname/pillars/nginx_passenger.sls @@ -3,19 +3,69 @@ # # SPDX-License-Identifier: AGPL-3.0 +{%- set passenger_pkg = 'nginx-mod-http-passenger' + if grains.osfinger in ('CentOS Linux-7') else + 'libnginx-mod-http-passenger' %} +{%- set passenger_mod = '/usr/lib64/nginx/modules/ngx_http_passenger_module.so' + if grains.osfinger in ('CentOS Linux-7',) else + '/usr/lib/nginx/modules/ngx_http_passenger_module.so' %} +{%- set passenger_ruby = '/usr/local/rvm/rubies/ruby-2.7.2/bin/ruby' + if grains.osfinger in ('CentOS Linux-7', 'Ubuntu-18.04',) else + '/usr/bin/ruby' %} + ### NGINX nginx: install_from_phusionpassenger: true lookup: - passenger_package: libnginx-mod-http-passenger - passenger_config_file: /etc/nginx/conf.d/mod-http-passenger.conf + passenger_package: {{ passenger_pkg }} + ### PASSENGER + passenger: + passenger_ruby: {{ passenger_ruby }} ### SERVER server: config: - include: 'modules-enabled/*.conf' + # This is required to get the passenger module loaded + # In Debian it can be done with this + # include: 'modules-enabled/*.conf' + load_module: {{ passenger_mod }} + worker_processes: 4 + ### SNIPPETS + snippets: + # Based on https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.4 + ssl_hardening_default.conf: + - ssl_session_timeout: 1d + - ssl_session_cache: 'shared:arvadosSSL:10m' + - ssl_session_tickets: 'off' + + # intermediate configuration + - ssl_protocols: TLSv1.2 TLSv1.3 + - ssl_ciphers: ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384 + - ssl_prefer_server_ciphers: 'off' + + # HSTS (ngx_http_headers_module is required) (63072000 seconds) + - add_header: 'Strict-Transport-Security "max-age=63072000" always' + + # OCSP stapling + # FIXME! Stapling does not work with self-signed certificates, so disabling for tests + # - ssl_stapling: 'on' + # - ssl_stapling_verify: 'on' + + # verify chain of trust of OCSP response using Root CA and Intermediate certs + # - ssl_trusted_certificate /path/to/root_CA_cert_plus_intermediates + + # curl https://ssl-config.mozilla.org/ffdhe2048.txt > /path/to/dhparam + # - ssl_dhparam: /path/to/dhparam + + # replace with the IP address of your resolver + # - resolver: 127.0.0.1 + + arvados-snakeoil.conf: + - ssl_certificate: /etc/ssl/private/arvados-snakeoil-cert.pem + - ssl_certificate_key: /etc/ssl/private/arvados-snakeoil-cert.key + ### SITES servers: managed: diff --git a/tools/salt-install/config_examples/single_host/single_hostname/states/snakeoil_certs.sls b/tools/salt-install/config_examples/single_host/single_hostname/states/snakeoil_certs.sls index 130fb5e937..b6929fb887 100644 --- a/tools/salt-install/config_examples/single_host/single_hostname/states/snakeoil_certs.sls +++ b/tools/salt-install/config_examples/single_host/single_hostname/states/snakeoil_certs.sls @@ -1,15 +1,22 @@ # Copyright (C) The Arvados Authors. All rights reserved. # -# SPDX-License-Identifier: AGPL-3.0 +# SPDX-License-Identifier: Apache-2.0 {%- set curr_tpldir = tpldir %} {%- set tpldir = 'arvados' %} {%- from "arvados/map.jinja" import arvados with context %} {%- set tpldir = curr_tpldir %} -{%- set arvados_ca_cert_file = '/etc/ssl/certs/arvados-snakeoil-ca.pem' %} +include: + - nginx.passenger + - nginx.config + - nginx.service + +# Debian uses different dirs for certs and keys, but being a Snake Oil example, +# we'll keep it simple here. +{%- set arvados_ca_cert_file = '/etc/ssl/private/arvados-snakeoil-ca.pem' %} {%- set arvados_ca_key_file = '/etc/ssl/private/arvados-snakeoil-ca.key' %} -{%- set arvados_cert_file = '/etc/ssl/certs/arvados-snakeoil-cert.pem' %} +{%- set arvados_cert_file = '/etc/ssl/private/arvados-snakeoil-cert.pem' %} {%- set arvados_csr_file = '/etc/ssl/private/arvados-snakeoil-cert.csr' %} {%- set arvados_key_file = '/etc/ssl/private/arvados-snakeoil-cert.key' %} @@ -30,7 +37,7 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_dependencies_pkg_in - ca-certificates arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run: - # Taken from https://github.com/arvados/arvados/blob/main/tools/arvbox/lib/arvbox/docker/service/certificate/run + # Taken from https://github.com/arvados/arvados/blob/master/tools/arvbox/lib/arvbox/docker/service/certificate/run cmd.run: - name: | # These dirs are not to CentOS-ish, but this is a helper script @@ -124,6 +131,9 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_c - require: - pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_dependencies_pkg_installed - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run + # We need this before we can add the nginx's snippet + - require_in: + - file: nginx_snippet_arvados-snakeoil.conf {%- if grains.get('os_family') == 'Debian' %} arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed: @@ -133,26 +143,13 @@ arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_instal - sls: postgres arvados_test_salt_states_examples_single_host_snakeoil_certs_certs_permissions_cmd_run: - cmd.run: - - name: | - chown root:ssl-cert {{ arvados_key_file }} + file.managed: + - name: {{ arvados_key_file }} + - owner: root + - group: ssl-cert - require: - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_cert_cmd_run - pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed -{%- endif %} - -arvados_test_salt_states_examples_single_host_snakeoil_certs_nginx_snakeoil_file_managed: - file.managed: - - name: /etc/nginx/snippets/arvados-snakeoil.conf - - contents: | - ssl_certificate {{ arvados_cert_file }}; - ssl_certificate_key {{ arvados_key_file }}; - - require: - - pkg: nginx_install - require_in: - - file: nginx_config - - service: nginx_service - - watch_in: - - service: nginx_service - - + - file: nginx_snippet_arvados-snakeoil.conf +{%- endif %} diff --git a/tools/salt-install/local.params.example.multiple_hosts b/tools/salt-install/local.params.example.multiple_hosts index 17b7b88884..283c631ec5 100644 --- a/tools/salt-install/local.params.example.multiple_hosts +++ b/tools/salt-install/local.params.example.multiple_hosts @@ -100,6 +100,6 @@ RELEASE="production" # ARVADOS_TAG="2.2.0" # POSTGRES_TAG="v0.41.6" # NGINX_TAG="temp-fix-missing-statements-in-pillar" -# DOCKER_TAG="v1.0.0" +# DOCKER_TAG="v2.0.7" # LOCALE_TAG="v0.3.4" # LETSENCRYPT_TAG="v2.1.0" diff --git a/tools/salt-install/local.params.example.single_host_multiple_hostnames b/tools/salt-install/local.params.example.single_host_multiple_hostnames index ae54e7437a..e23634e8c4 100644 --- a/tools/salt-install/local.params.example.single_host_multiple_hostnames +++ b/tools/salt-install/local.params.example.single_host_multiple_hostnames @@ -72,6 +72,6 @@ RELEASE="production" # ARVADOS_TAG="2.2.0" # POSTGRES_TAG="v0.41.6" # NGINX_TAG="temp-fix-missing-statements-in-pillar" -# DOCKER_TAG="v1.0.0" +# DOCKER_TAG="v2.0.7" # LOCALE_TAG="v0.3.4" # LETSENCRYPT_TAG="v2.1.0" diff --git a/tools/salt-install/local.params.example.single_host_single_hostname b/tools/salt-install/local.params.example.single_host_single_hostname index a35bd45bff..ae9804863f 100644 --- a/tools/salt-install/local.params.example.single_host_single_hostname +++ b/tools/salt-install/local.params.example.single_host_single_hostname @@ -81,6 +81,6 @@ RELEASE="production" # ARVADOS_TAG="2.2.0" # POSTGRES_TAG="v0.41.6" # NGINX_TAG="temp-fix-missing-statements-in-pillar" -# DOCKER_TAG="v1.0.0" +# DOCKER_TAG="v2.0.7" # LOCALE_TAG="v0.3.4" # LETSENCRYPT_TAG="v2.1.0" diff --git a/tools/salt-install/provision.sh b/tools/salt-install/provision.sh index 7ac120e5fd..b840d86c6f 100755 --- a/tools/salt-install/provision.sh +++ b/tools/salt-install/provision.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env bash +#!/bin/bash # Copyright (C) The Arvados Authors. All rights reserved. # @@ -11,6 +11,7 @@ # vagrant up set -o pipefail +set -x # capture the directory that the script is running from SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" @@ -177,7 +178,7 @@ VERSION="latest" # Other formula versions we depend on POSTGRES_TAG="v0.41.6" NGINX_TAG="temp-fix-missing-statements-in-pillar" -DOCKER_TAG="v1.0.0" +DOCKER_TAG="v2.0.7" LOCALE_TAG="v0.3.4" LETSENCRYPT_TAG="v2.1.0" @@ -232,8 +233,23 @@ fi if [ "${DUMP_CONFIG}" = "yes" ]; then echo "The provision installer will just dump a config under ${DUMP_SALT_CONFIG_DIR} and exit" else - apt-get update - apt-get install -y curl git jq + # Install a few dependency packages + # First, let's figure out the OS we're working on + OS_ID=$(grep ^ID= /etc/os-release |cut -f 2 -d= |cut -f 2 -d \") + echo "Detected distro: ${OS_ID}" + + case ${OS_ID} in + "centos") + echo "WARNING! Disabling SELinux, see https://dev.arvados.org/issues/18019" + sed -i 's/SELINUX=enforcing/SELINUX=permissive' /etc/sysconfig/selinux + setenforce permissive + yum install -y curl git jq + ;; + "debian"|"ubuntu") + DEBIAN_FRONTEND=noninteractive apt update + DEBIAN_FRONTEND=noninteractive apt install -y curl git jq + ;; + esac if which salt-call; then echo "Salt already installed" @@ -246,6 +262,8 @@ else # Set salt to masterless mode cat > /etc/salt/minion << EOFSM +failhard: "True" + file_client: local file_roots: base: @@ -607,5 +625,10 @@ fi # Test that the installation finished correctly if [ "x${TEST}" = "xyes" ]; then cd ${T_DIR} - ./run-test.sh + # If we use RVM, we need to run this with it, or most ruby commands will fail + RVM_EXEC="" + if [ -x /usr/local/rvm/bin/rvm-exec ]; then + RVM_EXEC="/usr/local/rvm/bin/rvm-exec" + fi + ${RVM_EXEC} ./run-test.sh fi diff --git a/tools/salt-install/tests/run-test.sh b/tools/salt-install/tests/run-test.sh index 53c51a2c5a..020efa94e8 100755 --- a/tools/salt-install/tests/run-test.sh +++ b/tools/salt-install/tests/run-test.sh @@ -55,13 +55,17 @@ echo "Activating user '__INITIAL_USER__'" arv user update --uuid "${user_uuid}" --user '{"is_active": true}' echo "Getting the user API TOKEN" -user_api_token=$(arv api_client_authorization list --filters "[[\"owner_uuid\", \"=\", \"${user_uuid}\"],[\"kind\", \"==\", \"arvados#apiClientAuthorization\"]]" --limit=1 |jq -r .items[].api_token) +user_api_token=$(arv api_client_authorization list | jq -r ".items[] | select( .owner_uuid == \"${user_uuid}\" ).api_token" | head -1) if [ "x${user_api_token}" = "x" ]; then + echo "No existing token found for user '__INITIAL_USER__' (user_uuid: '${user_uuid}'). Creating token" user_api_token=$(arv api_client_authorization create --api-client-authorization "{\"owner_uuid\": \"${user_uuid}\"}" | jq -r .api_token) fi +echo "API TOKEN FOR user '__INITIAL_USER__': '${user_api_token}'." + # Change to the user's token and run the workflow +echo "Switching to user '__INITIAL_USER__'" export ARVADOS_API_TOKEN="${user_api_token}" echo "Running test CWL workflow" diff --git a/tools/test-collection-create/test-collection-create.py b/tools/test-collection-create/test-collection-create.py new file mode 100644 index 0000000000..9a02745694 --- /dev/null +++ b/tools/test-collection-create/test-collection-create.py @@ -0,0 +1,443 @@ +#!/usr/bin/env python3 +# +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: CC-BY-SA-3.0 + +import argparse +import logging +import random +import string +import sys + +import arvados +import arvados.collection + +logger = logging.getLogger('arvados.test_collection_create') +logger.setLevel(logging.INFO) + +opts = argparse.ArgumentParser(add_help=False) +opts.add_argument('--min-files', type=int, default=30000, help=""" +Minimum number of files on each directory. Default: 30000. +""") +opts.add_argument('--max-files', type=int, default=30000, help=""" +Maximum number of files on each directory. Default: 30000. +""") +opts.add_argument('--min-depth', type=int, default=0, help=""" +Minimum depth for the created tree structure. Default: 0. +""") +opts.add_argument('--max-depth', type=int, default=0, help=""" +Maximum depth for the created tree structure. Default: 0. +""") +opts.add_argument('--min-subdirs', type=int, default=1, help=""" +Minimum number of subdirectories created at every depth level. Default: 1. +""") +opts.add_argument('--max-subdirs', type=int, default=10, help=""" +Maximum number of subdirectories created at every depth level. Default: 10. +""") +opts.add_argument('--debug', action='store_true', default=False, help=""" +Sets logging level to DEBUG. +""") + +arg_parser = argparse.ArgumentParser( + description='Create a collection with garbage data for testing purposes.', + parents=[opts]) + +adjectives = ['abandoned','able','absolute','adorable','adventurous','academic', + 'acceptable','acclaimed','accomplished','accurate','aching','acidic','acrobatic', + 'active','actual','adept','admirable','admired','adolescent','adorable','adored', + 'advanced','afraid','affectionate','aged','aggravating','aggressive','agile', + 'agitated','agonizing','agreeable','ajar','alarmed','alarming','alert','alienated', + 'alive','all','altruistic','amazing','ambitious','ample','amused','amusing','anchored', + 'ancient','angelic','angry','anguished','animated','annual','another','antique', + 'anxious','any','apprehensive','appropriate','apt','arctic','arid','aromatic','artistic', + 'ashamed','assured','astonishing','athletic','attached','attentive','attractive', + 'austere','authentic','authorized','automatic','avaricious','average','aware','awesome', + 'awful','awkward','babyish','bad','back','baggy','bare','barren','basic','beautiful', + 'belated','beloved','beneficial','better','best','bewitched','big','big-hearted', + 'biodegradable','bite-sized','bitter','black','black-and-white','bland','blank', + 'blaring','bleak','blind','blissful','blond','blue','blushing','bogus','boiling', + 'bold','bony','boring','bossy','both','bouncy','bountiful','bowed','brave','breakable', + 'brief','bright','brilliant','brisk','broken','bronze','brown','bruised','bubbly', + 'bulky','bumpy','buoyant','burdensome','burly','bustling','busy','buttery','buzzing', + 'calculating','calm','candid','canine','capital','carefree','careful','careless', + 'caring','cautious','cavernous','celebrated','charming','cheap','cheerful','cheery', + 'chief','chilly','chubby','circular','classic','clean','clear','clear-cut','clever', + 'close','closed','cloudy','clueless','clumsy','cluttered','coarse','cold','colorful', + 'colorless','colossal','comfortable','common','compassionate','competent','complete', + 'complex','complicated','composed','concerned','concrete','confused','conscious', + 'considerate','constant','content','conventional','cooked','cool','cooperative', + 'coordinated','corny','corrupt','costly','courageous','courteous','crafty','crazy', + 'creamy','creative','creepy','criminal','crisp','critical','crooked','crowded', + 'cruel','crushing','cuddly','cultivated','cultured','cumbersome','curly','curvy', + 'cute','cylindrical','damaged','damp','dangerous','dapper','daring','darling','dark', + 'dazzling','dead','deadly','deafening','dear','dearest','decent','decimal','decisive', + 'deep','defenseless','defensive','defiant','deficient','definite','definitive','delayed', + 'delectable','delicious','delightful','delirious','demanding','dense','dental', + 'dependable','dependent','descriptive','deserted','detailed','determined','devoted', + 'different','difficult','digital','diligent','dim','dimpled','dimwitted','direct', + 'disastrous','discrete','disfigured','disgusting','disloyal','dismal','distant', + 'downright','dreary','dirty','disguised','dishonest','dismal','distant','distinct', + 'distorted','dizzy','dopey','doting','double','downright','drab','drafty','dramatic', + 'dreary','droopy','dry','dual','dull','dutiful','each','eager','earnest','early', + 'easy','easy-going','ecstatic','edible','educated','elaborate','elastic','elated', + 'elderly','electric','elegant','elementary','elliptical','embarrassed','embellished', + 'eminent','emotional','empty','enchanted','enchanting','energetic','enlightened', + 'enormous','enraged','entire','envious','equal','equatorial','essential','esteemed', + 'ethical','euphoric','even','evergreen','everlasting','every','evil','exalted', + 'excellent','exemplary','exhausted','excitable','excited','exciting','exotic', + 'expensive','experienced','expert','extraneous','extroverted','extra-large','extra-small', + 'fabulous','failing','faint','fair','faithful','fake','false','familiar','famous', + 'fancy','fantastic','far','faraway','far-flung','far-off','fast','fat','fatal', + 'fatherly','favorable','favorite','fearful','fearless','feisty','feline','female', + 'feminine','few','fickle','filthy','fine','finished','firm','first','firsthand', + 'fitting','fixed','flaky','flamboyant','flashy','flat','flawed','flawless','flickering', + 'flimsy','flippant','flowery','fluffy','fluid','flustered','focused','fond','foolhardy', + 'foolish','forceful','forked','formal','forsaken','forthright','fortunate','fragrant', + 'frail','frank','frayed','free','French','fresh','frequent','friendly','frightened', + 'frightening','frigid','frilly','frizzy','frivolous','front','frosty','frozen', + 'frugal','fruitful','full','fumbling','functional','funny','fussy','fuzzy','gargantuan', + 'gaseous','general','generous','gentle','genuine','giant','giddy','gigantic','gifted', + 'giving','glamorous','glaring','glass','gleaming','gleeful','glistening','glittering', + 'gloomy','glorious','glossy','glum','golden','good','good-natured','gorgeous', + 'graceful','gracious','grand','grandiose','granular','grateful','grave','gray', + 'great','greedy','green','gregarious','grim','grimy','gripping','grizzled','gross', + 'grotesque','grouchy','grounded','growing','growling','grown','grubby','gruesome', + 'grumpy','guilty','gullible','gummy','hairy','half','handmade','handsome','handy', + 'happy','happy-go-lucky','hard','hard-to-find','harmful','harmless','harmonious', + 'harsh','hasty','hateful','haunting','healthy','heartfelt','hearty','heavenly', + 'heavy','hefty','helpful','helpless','hidden','hideous','high','high-level','hilarious', + 'hoarse','hollow','homely','honest','honorable','honored','hopeful','horrible', + 'hospitable','hot','huge','humble','humiliating','humming','humongous','hungry', + 'hurtful','husky','icky','icy','ideal','idealistic','identical','idle','idiotic', + 'idolized','ignorant','ill','illegal','ill-fated','ill-informed','illiterate', + 'illustrious','imaginary','imaginative','immaculate','immaterial','immediate', + 'immense','impassioned','impeccable','impartial','imperfect','imperturbable','impish', + 'impolite','important','impossible','impractical','impressionable','impressive', + 'improbable','impure','inborn','incomparable','incompatible','incomplete','inconsequential', + 'incredible','indelible','inexperienced','indolent','infamous','infantile','infatuated', + 'inferior','infinite','informal','innocent','insecure','insidious','insignificant', + 'insistent','instructive','insubstantial','intelligent','intent','intentional', + 'interesting','internal','international','intrepid','ironclad','irresponsible', + 'irritating','itchy','jaded','jagged','jam-packed','jaunty','jealous','jittery', + 'joint','jolly','jovial','joyful','joyous','jubilant','judicious','juicy','jumbo', + 'junior','jumpy','juvenile','kaleidoscopic','keen','key','kind','kindhearted','kindly', + 'klutzy','knobby','knotty','knowledgeable','knowing','known','kooky','kosher','lame', + 'lanky','large','last','lasting','late','lavish','lawful','lazy','leading','lean', + 'leafy','left','legal','legitimate','light','lighthearted','likable','likely','limited', + 'limp','limping','linear','lined','liquid','little','live','lively','livid','loathsome', + 'lone','lonely','long','long-term','loose','lopsided','lost','loud','lovable','lovely', + 'loving','low','loyal','lucky','lumbering','luminous','lumpy','lustrous','luxurious', + 'mad','made-up','magnificent','majestic','major','male','mammoth','married','marvelous', + 'masculine','massive','mature','meager','mealy','mean','measly','meaty','medical', + 'mediocre','medium','meek','mellow','melodic','memorable','menacing','merry','messy', + 'metallic','mild','milky','mindless','miniature','minor','minty','miserable','miserly', + 'misguided','misty','mixed','modern','modest','moist','monstrous','monthly','monumental', + 'moral','mortified','motherly','motionless','mountainous','muddy','muffled','multicolored', + 'mundane','murky','mushy','musty','muted','mysterious','naive','narrow','nasty','natural', + 'naughty','nautical','near','neat','necessary','needy','negative','neglected','negligible', + 'neighboring','nervous','new','next','nice','nifty','nimble','nippy','nocturnal','noisy', + 'nonstop','normal','notable','noted','noteworthy','novel','noxious','numb','nutritious', + 'nutty','obedient','obese','oblong','oily','oblong','obvious','occasional','odd', + 'oddball','offbeat','offensive','official','old','old-fashioned','only','open','optimal', + 'optimistic','opulent','orange','orderly','organic','ornate','ornery','ordinary', + 'original','other','our','outlying','outgoing','outlandish','outrageous','outstanding', + 'oval','overcooked','overdue','overjoyed','overlooked','palatable','pale','paltry', + 'parallel','parched','partial','passionate','past','pastel','peaceful','peppery', + 'perfect','perfumed','periodic','perky','personal','pertinent','pesky','pessimistic', + 'petty','phony','physical','piercing','pink','pitiful','plain','plaintive','plastic', + 'playful','pleasant','pleased','pleasing','plump','plush','polished','polite','political', + 'pointed','pointless','poised','poor','popular','portly','posh','positive','possible', + 'potable','powerful','powerless','practical','precious','present','prestigious', + 'pretty','precious','previous','pricey','prickly','primary','prime','pristine','private', + 'prize','probable','productive','profitable','profuse','proper','proud','prudent', + 'punctual','pungent','puny','pure','purple','pushy','putrid','puzzled','puzzling', + 'quaint','qualified','quarrelsome','quarterly','queasy','querulous','questionable', + 'quick','quick-witted','quiet','quintessential','quirky','quixotic','quizzical', + 'radiant','ragged','rapid','rare','rash','raw','recent','reckless','rectangular', + 'ready','real','realistic','reasonable','red','reflecting','regal','regular', + 'reliable','relieved','remarkable','remorseful','remote','repentant','required', + 'respectful','responsible','repulsive','revolving','rewarding','rich','rigid', + 'right','ringed','ripe','roasted','robust','rosy','rotating','rotten','rough', + 'round','rowdy','royal','rubbery','rundown','ruddy','rude','runny','rural','rusty', + 'sad','safe','salty','same','sandy','sane','sarcastic','sardonic','satisfied', + 'scaly','scarce','scared','scary','scented','scholarly','scientific','scornful', + 'scratchy','scrawny','second','secondary','second-hand','secret','self-assured', + 'self-reliant','selfish','sentimental','separate','serene','serious','serpentine', + 'several','severe','shabby','shadowy','shady','shallow','shameful','shameless', + 'sharp','shimmering','shiny','shocked','shocking','shoddy','short','short-term', + 'showy','shrill','shy','sick','silent','silky','silly','silver','similar','simple', + 'simplistic','sinful','single','sizzling','skeletal','skinny','sleepy','slight', + 'slim','slimy','slippery','slow','slushy','small','smart','smoggy','smooth','smug', + 'snappy','snarling','sneaky','sniveling','snoopy','sociable','soft','soggy','solid', + 'somber','some','spherical','sophisticated','sore','sorrowful','soulful','soupy', + 'sour','Spanish','sparkling','sparse','specific','spectacular','speedy','spicy', + 'spiffy','spirited','spiteful','splendid','spotless','spotted','spry','square', + 'squeaky','squiggly','stable','staid','stained','stale','standard','starchy','stark', + 'starry','steep','sticky','stiff','stimulating','stingy','stormy','straight','strange', + 'steel','strict','strident','striking','striped','strong','studious','stunning', + 'stupendous','stupid','sturdy','stylish','subdued','submissive','substantial','subtle', + 'suburban','sudden','sugary','sunny','super','superb','superficial','superior', + 'supportive','sure-footed','surprised','suspicious','svelte','sweaty','sweet','sweltering', + 'swift','sympathetic','tall','talkative','tame','tan','tangible','tart','tasty', + 'tattered','taut','tedious','teeming','tempting','tender','tense','tepid','terrible', + 'terrific','testy','thankful','that','these','thick','thin','third','thirsty','this', + 'thorough','thorny','those','thoughtful','threadbare','thrifty','thunderous','tidy', + 'tight','timely','tinted','tiny','tired','torn','total','tough','traumatic','treasured', + 'tremendous','tragic','trained','tremendous','triangular','tricky','trifling','trim', + 'trivial','troubled','true','trusting','trustworthy','trusty','truthful','tubby', + 'turbulent','twin','ugly','ultimate','unacceptable','unaware','uncomfortable', + 'uncommon','unconscious','understated','unequaled','uneven','unfinished','unfit', + 'unfolded','unfortunate','unhappy','unhealthy','uniform','unimportant','unique', + 'united','unkempt','unknown','unlawful','unlined','unlucky','unnatural','unpleasant', + 'unrealistic','unripe','unruly','unselfish','unsightly','unsteady','unsung','untidy', + 'untimely','untried','untrue','unused','unusual','unwelcome','unwieldy','unwilling', + 'unwitting','unwritten','upbeat','upright','upset','urban','usable','used','useful', + 'useless','utilized','utter','vacant','vague','vain','valid','valuable','vapid', + 'variable','vast','velvety','venerated','vengeful','verifiable','vibrant','vicious', + 'victorious','vigilant','vigorous','villainous','violet','violent','virtual', + 'virtuous','visible','vital','vivacious','vivid','voluminous','wan','warlike','warm', + 'warmhearted','warped','wary','wasteful','watchful','waterlogged','watery','wavy', + 'wealthy','weak','weary','webbed','wee','weekly','weepy','weighty','weird','welcome', + 'well-documented','well-groomed','well-informed','well-lit','well-made','well-off', + 'well-to-do','well-worn','wet','which','whimsical','whirlwind','whispered','white', + 'whole','whopping','wicked','wide','wide-eyed','wiggly','wild','willing','wilted', + 'winding','windy','winged','wiry','wise','witty','wobbly','woeful','wonderful', + 'wooden','woozy','wordy','worldly','worn','worried','worrisome','worse','worst', + 'worthless','worthwhile','worthy','wrathful','wretched','writhing','wrong','wry', + 'yawning','yearly','yellow','yellowish','young','youthful','yummy','zany','zealous', + 'zesty','zigzag'] +nouns = ['people','history','way','art','world','information','map','two','family', + 'government','health','system','computer','meat','year','thanks','music','person', + 'reading','method','data','food','understanding','theory','law','bird','literature', + 'problem','software','control','knowledge','power','ability','economics','love', + 'internet','television','science','library','nature','fact','product','idea', + 'temperature','investment','area','society','activity','story','industry','media', + 'thing','oven','community','definition','safety','quality','development','language', + 'management','player','variety','video','week','security','country','exam','movie', + 'organization','equipment','physics','analysis','policy','series','thought','basis', + 'boyfriend','direction','strategy','technology','army','camera','freedom','paper', + 'environment','child','instance','month','truth','marketing','university','writing', + 'article','department','difference','goal','news','audience','fishing','growth', + 'income','marriage','user','combination','failure','meaning','medicine','philosophy', + 'teacher','communication','night','chemistry','disease','disk','energy','nation', + 'road','role','soup','advertising','location','success','addition','apartment','education', + 'math','moment','painting','politics','attention','decision','event','property', + 'shopping','student','wood','competition','distribution','entertainment','office', + 'population','president','unit','category','cigarette','context','introduction', + 'opportunity','performance','driver','flight','length','magazine','newspaper', + 'relationship','teaching','cell','dealer','finding','lake','member','message','phone', + 'scene','appearance','association','concept','customer','death','discussion','housing', + 'inflation','insurance','mood','woman','advice','blood','effort','expression','importance', + 'opinion','payment','reality','responsibility','situation','skill','statement','wealth', + 'application','city','county','depth','estate','foundation','grandmother','heart', + 'perspective','photo','recipe','studio','topic','collection','depression','imagination', + 'passion','percentage','resource','setting','ad','agency','college','connection', + 'criticism','debt','description','memory','patience','secretary','solution','administration', + 'aspect','attitude','director','personality','psychology','recommendation','response', + 'selection','storage','version','alcohol','argument','complaint','contract','emphasis', + 'highway','loss','membership','possession','preparation','steak','union','agreement', + 'cancer','currency','employment','engineering','entry','interaction','mixture','preference', + 'region','republic','tradition','virus','actor','classroom','delivery','device', + 'difficulty','drama','election','engine','football','guidance','hotel','owner', + 'priority','protection','suggestion','tension','variation','anxiety','atmosphere', + 'awareness','bath','bread','candidate','climate','comparison','confusion','construction', + 'elevator','emotion','employee','employer','guest','height','leadership','mall','manager', + 'operation','recording','sample','transportation','charity','cousin','disaster','editor', + 'efficiency','excitement','extent','feedback','guitar','homework','leader','mom','outcome', + 'permission','presentation','promotion','reflection','refrigerator','resolution','revenue', + 'session','singer','tennis','basket','bonus','cabinet','childhood','church','clothes','coffee', + 'dinner','drawing','hair','hearing','initiative','judgment','lab','measurement','mode','mud', + 'orange','poetry','police','possibility','procedure','queen','ratio','relation','restaurant', + 'satisfaction','sector','signature','significance','song','tooth','town','vehicle','volume','wife', + 'accident','airport','appointment','arrival','assumption','baseball','chapter','committee', + 'conversation','database','enthusiasm','error','explanation','farmer','gate','girl','hall', + 'historian','hospital','injury','instruction','maintenance','manufacturer','meal','perception','pie', + 'poem','presence','proposal','reception','replacement','revolution','river','son','speech','tea', + 'village','warning','winner','worker','writer','assistance','breath','buyer','chest','chocolate', + 'conclusion','contribution','cookie','courage','dad','desk','drawer','establishment','examination', + 'garbage','grocery','honey','impression','improvement','independence','insect','inspection', + 'inspector','king','ladder','menu','penalty','piano','potato','profession','professor','quantity', + 'reaction','requirement','salad','sister','supermarket','tongue','weakness','wedding','affair', + 'ambition','analyst','apple','assignment','assistant','bathroom','bedroom','beer','birthday', + 'celebration','championship','cheek','client','consequence','departure','diamond','dirt','ear', + 'fortune','friendship','funeral','gene','girlfriend','hat','indication','intention','lady', + 'midnight','negotiation','obligation','passenger','pizza','platform','poet','pollution', + 'recognition','reputation','shirt','sir','speaker','stranger','surgery','sympathy','tale','throat', + 'trainer','uncle','youth','time','work','film','water','money','example','while','business','study', + 'game','life','form','air','day','place','number','part','field','fish','back','process','heat', + 'hand','experience','job','book','end','point','type','home','economy','value','body','market', + 'guide','interest','state','radio','course','company','price','size','card','list','mind','trade', + 'line','care','group','risk','word','fat','force','key','light','training','name','school','top', + 'amount','level','order','practice','research','sense','service','piece','web','boss','sport','fun', + 'house','page','term','test','answer','sound','focus','matter','kind','soil','board','oil','picture', + 'access','garden','range','rate','reason','future','site','demand','exercise','image','case','cause', + 'coast','action','age','bad','boat','record','result','section','building','mouse','cash','class', + 'nothing','period','plan','store','tax','side','subject','space','rule','stock','weather','chance', + 'figure','man','model','source','beginning','earth','program','chicken','design','feature','head', + 'material','purpose','question','rock','salt','act','birth','car','dog','object','scale','sun', + 'note','profit','rent','speed','style','war','bank','craft','half','inside','outside','standard', + 'bus','exchange','eye','fire','position','pressure','stress','advantage','benefit','box','frame', + 'issue','step','cycle','face','item','metal','paint','review','room','screen','structure','view', + 'account','ball','discipline','medium','share','balance','bit','black','bottom','choice','gift', + 'impact','machine','shape','tool','wind','address','average','career','culture','morning','pot', + 'sign','table','task','condition','contact','credit','egg','hope','ice','network','north','square', + 'attempt','date','effect','link','post','star','voice','capital','challenge','friend','self','shot', + 'brush','couple','debate','exit','front','function','lack','living','plant','plastic','spot', + 'summer','taste','theme','track','wing','brain','button','click','desire','foot','gas','influence', + 'notice','rain','wall','base','damage','distance','feeling','pair','savings','staff','sugar', + 'target','text','animal','author','budget','discount','file','ground','lesson','minute','officer', + 'phase','reference','register','sky','stage','stick','title','trouble','bowl','bridge','campaign', + 'character','club','edge','evidence','fan','letter','lock','maximum','novel','option','pack','park', + 'plenty','quarter','skin','sort','weight','baby','background','carry','dish','factor','fruit', + 'glass','joint','master','muscle','red','strength','traffic','trip','vegetable','appeal','chart', + 'gear','ideal','kitchen','land','log','mother','net','party','principle','relative','sale','season', + 'signal','spirit','street','tree','wave','belt','bench','commission','copy','drop','minimum','path', + 'progress','project','sea','south','status','stuff','ticket','tour','angle','blue','breakfast', + 'confidence','daughter','degree','doctor','dot','dream','duty','essay','father','fee','finance', + 'hour','juice','limit','luck','milk','mouth','peace','pipe','seat','stable','storm','substance', + 'team','trick','afternoon','bat','beach','blank','catch','chain','consideration','cream','crew', + 'detail','gold','interview','kid','mark','match','mission','pain','pleasure','score','screw','sex', + 'shop','shower','suit','tone','window','agent','band','block','bone','calendar','cap','coat', + 'contest','corner','court','cup','district','door','east','finger','garage','guarantee','hole', + 'hook','implement','layer','lecture','lie','manner','meeting','nose','parking','partner','profile', + 'respect','rice','routine','schedule','swimming','telephone','tip','winter','airline','bag','battle', + 'bed','bill','bother','cake','code','curve','designer','dimension','dress','ease','emergency', + 'evening','extension','farm','fight','gap','grade','holiday','horror','horse','host','husband', + 'loan','mistake','mountain','nail','noise','occasion','package','patient','pause','phrase','proof', + 'race','relief','sand','sentence','shoulder','smoke','stomach','string','tourist','towel','vacation', + 'west','wheel','wine','arm','aside','associate','bet','blow','border','branch','breast','brother', + 'buddy','bunch','chip','coach','cross','document','draft','dust','expert','floor','god','golf', + 'habit','iron','judge','knife','landscape','league','mail','mess','native','opening','parent', + 'pattern','pin','pool','pound','request','salary','shame','shelter','shoe','silver','tackle','tank', + 'trust','assist','bake','bar','bell','bike','blame','boy','brick','chair','closet','clue','collar', + 'comment','conference','devil','diet','fear','fuel','glove','jacket','lunch','monitor','mortgage', + 'nurse','pace','panic','peak','plane','reward','row','sandwich','shock','spite','spray','surprise', + 'till','transition','weekend','welcome','yard','alarm','bend','bicycle','bite','blind','bottle', + 'cable','candle','clerk','cloud','concert','counter','flower','grandfather','harm','knee','lawyer', + 'leather','load','mirror','neck','pension','plate','purple','ruin','ship','skirt','slice','snow', + 'specialist','stroke','switch','trash','tune','zone','anger','award','bid','bitter','boot','bug', + 'camp','candy','carpet','cat','champion','channel','clock','comfort','cow','crack','engineer', + 'entrance','fault','grass','guy','hell','highlight','incident','island','joke','jury','leg','lip', + 'mate','motor','nerve','passage','pen','pride','priest','prize','promise','resident','resort','ring', + 'roof','rope','sail','scheme','script','sock','station','toe','tower','truck','witness','a','you', + 'it','can','will','if','one','many','most','other','use','make','good','look','help','go','great', + 'being','few','might','still','public','read','keep','start','give','human','local','general','she', + 'specific','long','play','feel','high','tonight','put','common','set','change','simple','past','big', + 'possible','particular','today','major','personal','current','national','cut','natural','physical', + 'show','try','check','second','call','move','pay','let','increase','single','individual','turn', + 'ask','buy','guard','hold','main','offer','potential','professional','international','travel','cook', + 'alternative','following','special','working','whole','dance','excuse','cold','commercial','low', + 'purchase','deal','primary','worth','fall','necessary','positive','produce','search','present', + 'spend','talk','creative','tell','cost','drive','green','support','glad','remove','return','run', + 'complex','due','effective','middle','regular','reserve','independent','leave','original','reach', + 'rest','serve','watch','beautiful','charge','active','break','negative','safe','stay','visit', + 'visual','affect','cover','report','rise','walk','white','beyond','junior','pick','unique', + 'anything','classic','final','lift','mix','private','stop','teach','western','concern','familiar', + 'fly','official','broad','comfortable','gain','maybe','rich','save','stand','young','fail','heavy', + 'hello','lead','listen','valuable','worry','handle','leading','meet','release','sell','finish', + 'normal','press','ride','secret','spread','spring','tough','wait','brown','deep','display','flow', + 'hit','objective','shoot','touch','cancel','chemical','cry','dump','extreme','push','conflict','eat', + 'fill','formal','jump','kick','opposite','pass','pitch','remote','total','treat','vast','abuse', + 'beat','burn','deposit','print','raise','sleep','somewhere','advance','anywhere','consist','dark', + 'double','draw','equal','fix','hire','internal','join','kill','sensitive','tap','win','attack', + 'claim','constant','drag','drink','guess','minor','pull','raw','soft','solid','wear','weird', + 'wonder','annual','count','dead','doubt','feed','forever','impress','nobody','repeat','round','sing', + 'slide','strip','whereas','wish','combine','command','dig','divide','equivalent','hang','hunt', + 'initial','march','mention','smell','spiritual','survey','tie','adult','brief','crazy','escape', + 'gather','hate','prior','repair','rough','sad','scratch','sick','strike','employ','external','hurt', + 'illegal','laugh','lay','mobile','nasty','ordinary','respond','royal','senior','split','strain', + 'struggle','swim','train','upper','wash','yellow','convert','crash','dependent','fold','funny', + 'grab','hide','miss','permit','quote','recover','resolve','roll','sink','slip','spare','suspect', + 'sweet','swing','twist','upstairs','usual','abroad','brave','calm','concentrate','estimate','grand', + 'male','mine','prompt','quiet','refuse','regret','reveal','rush','shake','shift','shine','steal', + 'suck','surround','anybody','bear','brilliant','dare','dear','delay','drunk','female','hurry', + 'inevitable','invite','kiss','neat','pop','punch','quit','reply','representative','resist','rip', + 'rub','silly','smile','spell','stretch','stupid','tear','temporary','tomorrow','wake','wrap', + 'yesterday'] + +def get_random_name(with_ext=True): + return "{}_{}_{}{}".format( + random.choice(adjectives), + random.choice(nouns), + random.randint(0, 50000), + with_ext and '.txt' or '') + +def get_random_file(max_filesize): + file_start = random.randint(0, (max_filesize - 1025)) + file_size = random.randint(0, (max_filesize - file_start)) + file_name = get_random_name() + return "{}:{}:{}".format(file_start, file_size, file_name) + +def get_stream(name, max_filesize, data_loc, args): + files = [] + for _ in range(random.randint(args.min_files, args.max_files)): + files.append(get_random_file(max_filesize)) + stream = "{} {} {}".format(name, data_loc, ' '.join(files)) + return stream + +def create_substreams(depth, base_stream_name, max_filesize, data_loc, args, current_size=0): + current_stream = get_stream(base_stream_name, max_filesize, data_loc, args) + current_size += len(current_stream) + streams = [current_stream] + + if current_size >= (128 * 1024 * 1024): + logger.debug("Maximum manifest size reached -- finishing early at {}".format(base_stream_name)) + elif depth == 0: + logger.debug("Finished stream {}".format(base_stream_name)) + else: + for _ in range(random.randint(args.min_subdirs, args.max_subdirs)): + stream_name = base_stream_name+'/'+get_random_name(False) + substreams = create_substreams(depth-1, stream_name, max_filesize, + data_loc, args, current_size) + current_size += sum([len(x) for x in substreams]) + if current_size >= (128 * 1024 * 1024): + break + streams.extend(substreams) + return streams + +def parse_arguments(arguments): + args = arg_parser.parse_args(arguments) + if args.debug: + logger.setLevel(logging.DEBUG) + if args.max_files < args.min_files: + arg_parser.error("--min-files={} should be less or equal than max-files={}".format(args.min_files, args.max_files)) + if args.min_depth < 0: + arg_parser.error("--min-depth should be at least 0") + if args.max_depth < 0 or args.max_depth < args.min_depth: + arg_parser.error("--max-depth should be at >= 0 and >= min-depth={}".format(args.min_depth)) + if args.max_subdirs < args.min_subdirs: + arg_parser.error("--min-subdirs={} should be less or equal than max-subdirs={}".format(args.min_subdirs, args.max_subdirs)) + return args + +def main(arguments=None): + args = parse_arguments(arguments) + logger.info("Creating test collection with (min={}, max={}) files per directory and a tree depth of (min={}, max={}) and (min={}, max={}) subdirs in each depth level...".format(args.min_files, args.max_files, args.min_depth, args.max_depth, args.min_subdirs, args.max_subdirs)) + api = arvados.api('v1', timeout=5*60) + max_filesize = 1024*1024 + data_block = ''.join([random.choice(string.printable) for i in range(max_filesize)]) + data_loc = arvados.KeepClient(api).put(data_block) + streams = create_substreams(random.randint(args.min_depth, args.max_depth), + '.', max_filesize, data_loc, args) + manifest = '' + for s in streams: + if len(manifest)+len(s) > (1024*1024*128)-2: + logger.info("Skipping stream {} to avoid making a manifest bigger than 128MiB".format(s.split(' ')[0])) + break + manifest += s + '\n' + try: + coll_name = get_random_name(False) + coll = api.collections().create( + body={"collection": { + "name": coll_name, + "manifest_text": manifest + }, + }).execute() + except: + logger.info("ERROR creating collection with name '{}' and manifest:\n'{}...'\nSize: {}".format(coll_name, manifest[0:1024], len(manifest))) + raise + logger.info("Created collection {} - manifest size: {}".format(coll["uuid"], len(manifest))) + return 0 + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file