[
['foo', 10, 25,
['/pipeline_instances/zzzzz-d1hrv-1xfj6xkicf2muk2',
- '/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+ '/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
'/jobs/zzzzz-8i9sb-grx15v5mjnsyxk7'],
['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3',
'/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf',
'/container_requests/zzzzz-xvhdp-cr4completedcr2']],
['pipeline_with_tagged_collection_input', 1, 1,
['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3'],
- ['/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+ ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
'/jobs/zzzzz-8i9sb-pshmckwoma9plh7',
'/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf',
'/container_requests/zzzzz-xvhdp-cr4completedcr2']],
['no_such_match', 0, 0,
[],
- ['/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+ ['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
'/jobs/zzzzz-8i9sb-pshmckwoma9plh7',
'/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf',
'/container_requests/zzzzz-xvhdp-cr4completedcr2']],
].each do |search_filter, expected_min, expected_max, expected, not_expected|
test "all_processes page for search filter '#{search_filter}'" do
- work_units_index(filters: [['any','@@', search_filter]], show_children: true)
+ work_units_index(filters: [['any','ilike', "%#{search_filter}%"]], show_children: true)
assert_response :success
# Verify that expected number of processes are found
[[true, 25, 100,
['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3',
- '/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+ '/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
'/jobs/zzzzz-8i9sb-grx15v5mjnsyxk7',
'/jobs/zzzzz-8i9sb-n7omg50bvt0m1nf',
'/container_requests/zzzzz-xvhdp-cr4completedcr2',
'/container_requests/zzzzz-xvhdp-oneof60crs00001']],
[false, 25, 100,
['/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk3',
- '/pipeline_instances/zzzzz-d1hrv-jobspeccomponts',
+ '/pipeline_instances/zzzzz-d1hrv-1yfj61234abcdk4',
'/container_requests/zzzzz-xvhdp-cr4completedcr2'],
['/pipeline_instances/zzzzz-d1hrv-scarxiyajtshq3l',
'/container_requests/zzzzz-xvhdp-oneof60crs00001',
echo >&2 "FATAL: --build-version '$2' is invalid, must match pattern ^[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+|)(~rc[0-9]+|~dev[0-9]+|)-[0-9]+$"
exit 1
else
+ [[ "$2" =~ (.*)-(.*) ]]
ARVADOS_BUILDING_VERSION="${BASH_REMATCH[1]}"
ARVADOS_BUILDING_ITERATION="${BASH_REMATCH[2]}"
fi
"Upgrading from 2.2.0":#v2_2_0
+h3. Removed deprecated '@@' search operator
+
+The '@@' full text search operator, previously deprecated, has been removed. To perform a string search across multiple columns, use the 'ilike' operator on 'any' column as described in the "available list method filter section":{{site.baseurl}}/api/methods.html#substringsearchfilter of the API documentation.
+
h3. Storage classes must be defined explicitly
If your configuration uses the StorageClasses attribute on any Keep volumes, you must add a new @StorageClasses@ section that lists all of your storage classes. Refer to the updated documentation about "configuring storage classes":{{site.baseurl}}/admin/storage-classes.html for details.
|1|operator|string|Comparison operator|@>@, @>=@, @like@, @not in@|
|2|operand|string, array, or null|Value to compare with the resource attribute|@"d00220fb%"@, @"1234"@, @["foo","bar"]@, @nil@|
-The following operators are available.[1]
+The following operators are available.
table(table table-bordered table-condensed).
|_. Operator|_. Operand type|_. Description|_. Example|
-|@=@, @!=@|string, number, timestamp, or null|Equality comparison|@["tail_uuid","=","xyzzy-j7d0g-fffffffffffffff"]@ @["tail_uuid","!=",null]@|
+|@=@, @!=@, @<>@|string, number, timestamp, JSON-encoded array, JSON-encoded object, or null|Equality comparison|@["tail_uuid","=","xyzzy-j7d0g-fffffffffffffff"]@
+@["tail_uuid","!=",null]@
+@["storage_classes_desired","=","[\"default\"]"]@|
|@<@, @<=@, @>=@, @>@|string, number, or timestamp|Ordering comparison|@["script_version",">","123"]@|
|@like@, @ilike@|string|SQL pattern match. Single character match is @_@ and wildcard is @%@. The @ilike@ operator is case-insensitive|@["script_version","like","d00220fb%"]@|
|@in@, @not in@|array of strings|Set membership|@["script_version","in",["main","d00220fb38d4b85ca8fc28a8151702a2b9d1dec5"]]@|
|@is_a@|string|Arvados object type|@["head_uuid","is_a","arvados#collection"]@|
-|@exists@|string|Test if a subproperty is present.|@["properties","exists","my_subproperty"]@|
+|@exists@|string|Presence of subproperty|@["properties","exists","my_subproperty"]@|
+|@contains@|string, array of strings|Presence of one or more keys or array elements|@["storage_classes_desired", "contains", ["foo", "bar"]]@ (matches both @["foo", "bar"]@ and @["foo", "bar", "baz"]@)
+(note @[..., "contains", "foo"]@ is also accepted, and is equivalent to @[..., "contains", ["foo"]]@)|
h4(#substringsearchfilter). Filtering using substring search
|@like@, @ilike@|string|SQL pattern match, single character match is @_@ and wildcard is @%@, ilike is case-insensitive|@["properties.my_subproperty", "like", "d00220fb%"]@|
|@in@, @not in@|array of strings|Set membership|@["properties.my_subproperty", "in", ["fizz", "buzz"]]@|
|@exists@|boolean|Test if a subproperty is present or not (determined by operand).|@["properties.my_subproperty", "exists", true]@|
-|@contains@|string, number|Filter where subproperty has a value either by exact match or value is element of subproperty list.|@["foo", "contains", "bar"]@ will find both @{"foo": "bar"}@ and @{"foo": ["bar", "baz"]}@.|
+|@contains@|string, number|Filter where subproperty has a value either by exact match or value is element of subproperty list.|@["properties.foo", "contains", "bar"]@ will find both @{"foo": "bar"}@ and @{"foo": ["bar", "baz"]}@.|
Note that exclusion filters @!=@ and @not in@ will return records for which the property is not defined at all. To restrict filtering to records on which the subproperty is defined, combine with an @exists@ filter.
|_. Argument |_. Type |_. Description |_. Location |
{background:#ccffcc}.|uuid|string|The UUID of the resource in question.|path||
|{resource_type}|object||query||
-
-fn1^. NOTE: The filter operator for full-text search (@@) which previously worked (but was undocumented) is deprecated and will be removed in a future release.
|manifest_text|text|||
|replication_desired|number|Minimum storage replication level desired for each data block referenced by this collection. A value of @null@ signifies that the site default replication level (typically 2) is desired.|@2@|
|replication_confirmed|number|Replication level most recently confirmed by the storage system. This field is null when a collection is first created, and is reset to null when the manifest_text changes in a way that introduces a new data block. An integer value indicates the replication level of the _least replicated_ data block in the collection.|@2@, null|
-|replication_confirmed_at|datetime|When replication_confirmed was confirmed. If replication_confirmed is null, this field is also null.||
+|replication_confirmed_at|datetime|When @replication_confirmed@ was confirmed. If @replication_confirmed@ is null, this field is also null.||
+|storage_classes_desired|list|An optional list of storage class names where the blocks should be saved. If not provided, the cluster's default storage class(es) will be set.|@['archival']@|
+|storage_classes_confirmed|list|Storage classes most recently confirmed by the storage system. This field is an empty list when a collection is first created.|@'archival']@, @[]@|
+|storage_classes_confirmed_at|datetime|When @storage_classes_confirmed@ was confirmed. If @storage_classes_confirmed@ is @[]@, this field is null.||
|trash_at|datetime|If @trash_at@ is non-null and in the past, this collection will be hidden from API calls. May be untrashed.||
|delete_at|datetime|If @delete_at@ is non-null and in the past, the collection may be permanently deleted.||
|is_trashed|boolean|True if @trash_at@ is in the past, false if not.||
<notextile>
<pre> Containers:
LSF:
- <code class="userinput">BsubArgumentsList: <b>["-C", "0"]</b></code>
+ <code class="userinput">BsubArgumentsList: <b>["-C", "0", "-o", "/tmp/crunch-run.%J.out", "-e", "/tmp/crunch-run.%J.err"]</b></code>
</pre>
</notextile>
+Note that the default value for @BsubArgumentsList@ uses the @-o@ and @-e@ arguments to write stdout/stderr data to files in @/tmp@ on the compute nodes, which is helpful for troubleshooting installation/configuration problems. Ensure you have something in place to delete old files from @/tmp@, or adjust these arguments accordingly.
+
h3(#PollPeriod). Containers.PollInterval
LSF:
# Additional arguments to bsub when submitting Arvados
# containers as LSF jobs.
- BsubArgumentsList: []
+ #
+ # Note that the default arguments cause LSF to write two files
+ # in /tmp on the compute node each time an Arvados container
+ # runs. Ensure you have something in place to delete old files
+ # from /tmp, or adjust these arguments accordingly.
+ BsubArgumentsList: ["-o", "/tmp/crunch-run.%J.out", "-e", "/tmp/crunch-run.%J.err"]
# Use sudo to switch to this user account when submitting LSF
# jobs.
"Collections.PreserveVersionIfIdle": true,
"Collections.S3FolderObjects": true,
"Collections.TrashSweepInterval": false,
- "Collections.TrustAllContent": false,
+ "Collections.TrustAllContent": true,
"Collections.WebDAVCache": false,
"Collections.KeepproxyPermission": false,
"Collections.WebDAVPermission": false,
LSF:
# Additional arguments to bsub when submitting Arvados
# containers as LSF jobs.
- BsubArgumentsList: []
+ #
+ # Note that the default arguments cause LSF to write two files
+ # in /tmp on the compute node each time an Arvados container
+ # runs. Ensure you have something in place to delete old files
+ # from /tmp, or adjust these arguments accordingly.
+ BsubArgumentsList: ["-o", "/tmp/crunch-run.%J.out", "-e", "/tmp/crunch-run.%J.err"]
# Use sudo to switch to this user account when submitting LSF
# jobs.
+++ /dev/null
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package controller
-
-import (
- "bufio"
- "bytes"
- "context"
- "crypto/md5"
- "encoding/json"
- "fmt"
- "io"
- "io/ioutil"
- "net/http"
- "strings"
- "sync"
-
- "git.arvados.org/arvados.git/sdk/go/arvados"
- "git.arvados.org/arvados.git/sdk/go/httpserver"
- "git.arvados.org/arvados.git/sdk/go/keepclient"
-)
-
-func rewriteSignatures(clusterID string, expectHash string,
- resp *http.Response, requestError error) (newResponse *http.Response, err error) {
-
- if requestError != nil {
- return resp, requestError
- }
-
- if resp.StatusCode != http.StatusOK {
- return resp, nil
- }
-
- originalBody := resp.Body
- defer originalBody.Close()
-
- var col arvados.Collection
- err = json.NewDecoder(resp.Body).Decode(&col)
- if err != nil {
- return nil, err
- }
-
- // rewriting signatures will make manifest text 5-10% bigger so calculate
- // capacity accordingly
- updatedManifest := bytes.NewBuffer(make([]byte, 0, int(float64(len(col.ManifestText))*1.1)))
-
- hasher := md5.New()
- mw := io.MultiWriter(hasher, updatedManifest)
- sz := 0
-
- scanner := bufio.NewScanner(strings.NewReader(col.ManifestText))
- scanner.Buffer(make([]byte, 1048576), len(col.ManifestText))
- for scanner.Scan() {
- line := scanner.Text()
- tokens := strings.Split(line, " ")
- if len(tokens) < 3 {
- return nil, fmt.Errorf("Invalid stream (<3 tokens): %q", line)
- }
-
- n, err := mw.Write([]byte(tokens[0]))
- if err != nil {
- return nil, fmt.Errorf("Error updating manifest: %v", err)
- }
- sz += n
- for _, token := range tokens[1:] {
- n, err = mw.Write([]byte(" "))
- if err != nil {
- return nil, fmt.Errorf("Error updating manifest: %v", err)
- }
- sz += n
-
- m := keepclient.SignedLocatorRe.FindStringSubmatch(token)
- if m != nil {
- // Rewrite the block signature to be a remote signature
- _, err = fmt.Fprintf(updatedManifest, "%s%s%s+R%s-%s%s", m[1], m[2], m[3], clusterID, m[5][2:], m[8])
- if err != nil {
- return nil, fmt.Errorf("Error updating manifest: %v", err)
- }
-
- // for hash checking, ignore signatures
- n, err = fmt.Fprintf(hasher, "%s%s", m[1], m[2])
- if err != nil {
- return nil, fmt.Errorf("Error updating manifest: %v", err)
- }
- sz += n
- } else {
- n, err = mw.Write([]byte(token))
- if err != nil {
- return nil, fmt.Errorf("Error updating manifest: %v", err)
- }
- sz += n
- }
- }
- n, err = mw.Write([]byte("\n"))
- if err != nil {
- return nil, fmt.Errorf("Error updating manifest: %v", err)
- }
- sz += n
- }
-
- // Check that expected hash is consistent with
- // portable_data_hash field of the returned record
- if expectHash == "" {
- expectHash = col.PortableDataHash
- } else if expectHash != col.PortableDataHash {
- return nil, fmt.Errorf("portable_data_hash %q on returned record did not match expected hash %q ", expectHash, col.PortableDataHash)
- }
-
- // Certify that the computed hash of the manifest_text matches our expectation
- sum := hasher.Sum(nil)
- computedHash := fmt.Sprintf("%x+%v", sum, sz)
- if computedHash != expectHash {
- return nil, fmt.Errorf("Computed manifest_text hash %q did not match expected hash %q", computedHash, expectHash)
- }
-
- col.ManifestText = updatedManifest.String()
-
- newbody, err := json.Marshal(col)
- if err != nil {
- return nil, err
- }
-
- buf := bytes.NewBuffer(newbody)
- resp.Body = ioutil.NopCloser(buf)
- resp.ContentLength = int64(buf.Len())
- resp.Header.Set("Content-Length", fmt.Sprintf("%v", buf.Len()))
-
- return resp, nil
-}
-
-func filterLocalClusterResponse(resp *http.Response, requestError error) (newResponse *http.Response, err error) {
- if requestError != nil {
- return resp, requestError
- }
-
- if resp.StatusCode == http.StatusNotFound {
- // Suppress returning this result, because we want to
- // search the federation.
- return nil, nil
- }
- return resp, nil
-}
-
-type searchRemoteClusterForPDH struct {
- pdh string
- remoteID string
- mtx *sync.Mutex
- sentResponse *bool
- sharedContext *context.Context
- cancelFunc func()
- errors *[]string
- statusCode *int
-}
-
-func fetchRemoteCollectionByUUID(
- h *genericFederatedRequestHandler,
- effectiveMethod string,
- clusterID *string,
- uuid string,
- remainder string,
- w http.ResponseWriter,
- req *http.Request) bool {
-
- if effectiveMethod != "GET" {
- // Only handle GET requests right now
- return false
- }
-
- if uuid != "" {
- // Collection UUID GET request
- *clusterID = uuid[0:5]
- if *clusterID != "" && *clusterID != h.handler.Cluster.ClusterID {
- // request for remote collection by uuid
- resp, err := h.handler.remoteClusterRequest(*clusterID, req)
- newResponse, err := rewriteSignatures(*clusterID, "", resp, err)
- h.handler.proxy.ForwardResponse(w, newResponse, err)
- return true
- }
- }
-
- return false
-}
-
-func fetchRemoteCollectionByPDH(
- h *genericFederatedRequestHandler,
- effectiveMethod string,
- clusterID *string,
- uuid string,
- remainder string,
- w http.ResponseWriter,
- req *http.Request) bool {
-
- if effectiveMethod != "GET" {
- // Only handle GET requests right now
- return false
- }
-
- m := collectionsByPDHRe.FindStringSubmatch(req.URL.Path)
- if len(m) != 2 {
- return false
- }
-
- // Request for collection by PDH. Search the federation.
-
- // First, query the local cluster.
- resp, err := h.handler.localClusterRequest(req)
- newResp, err := filterLocalClusterResponse(resp, err)
- if newResp != nil || err != nil {
- h.handler.proxy.ForwardResponse(w, newResp, err)
- return true
- }
-
- // Create a goroutine for each cluster in the
- // RemoteClusters map. The first valid result gets
- // returned to the client. When that happens, all
- // other outstanding requests are cancelled
- sharedContext, cancelFunc := context.WithCancel(req.Context())
- defer cancelFunc()
-
- req = req.WithContext(sharedContext)
- wg := sync.WaitGroup{}
- pdh := m[1]
- success := make(chan *http.Response)
- errorChan := make(chan error, len(h.handler.Cluster.RemoteClusters))
-
- acquire, release := semaphore(h.handler.Cluster.API.MaxRequestAmplification)
-
- for remoteID := range h.handler.Cluster.RemoteClusters {
- if remoteID == h.handler.Cluster.ClusterID {
- // No need to query local cluster again
- continue
- }
- if remoteID == "*" {
- // This isn't a real remote cluster: it just sets defaults for unlisted remotes.
- continue
- }
-
- wg.Add(1)
- go func(remote string) {
- defer wg.Done()
- acquire()
- defer release()
- select {
- case <-sharedContext.Done():
- return
- default:
- }
-
- resp, err := h.handler.remoteClusterRequest(remote, req)
- wasSuccess := false
- defer func() {
- if resp != nil && !wasSuccess {
- resp.Body.Close()
- }
- }()
- if err != nil {
- errorChan <- err
- return
- }
- if resp.StatusCode != http.StatusOK {
- errorChan <- HTTPError{resp.Status, resp.StatusCode}
- return
- }
- select {
- case <-sharedContext.Done():
- return
- default:
- }
-
- newResponse, err := rewriteSignatures(remote, pdh, resp, nil)
- if err != nil {
- errorChan <- err
- return
- }
- select {
- case <-sharedContext.Done():
- case success <- newResponse:
- wasSuccess = true
- }
- }(remoteID)
- }
- go func() {
- wg.Wait()
- cancelFunc()
- }()
-
- errorCode := http.StatusNotFound
-
- for {
- select {
- case newResp = <-success:
- h.handler.proxy.ForwardResponse(w, newResp, nil)
- return true
- case <-sharedContext.Done():
- var errors []string
- for len(errorChan) > 0 {
- err := <-errorChan
- if httperr, ok := err.(HTTPError); !ok || httperr.Code != http.StatusNotFound {
- errorCode = http.StatusBadGateway
- }
- errors = append(errors, err.Error())
- }
- httpserver.Errors(w, errors, errorCode)
- return true
- }
- }
-
- // shouldn't ever get here
- return true
-}
+++ /dev/null
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package controller
-
-import (
- "bytes"
- "encoding/json"
- "fmt"
- "io/ioutil"
- "net/http"
- "strings"
-
- "git.arvados.org/arvados.git/sdk/go/auth"
- "git.arvados.org/arvados.git/sdk/go/httpserver"
-)
-
-func remoteContainerRequestCreate(
- h *genericFederatedRequestHandler,
- effectiveMethod string,
- clusterID *string,
- uuid string,
- remainder string,
- w http.ResponseWriter,
- req *http.Request) bool {
-
- if effectiveMethod != "POST" || uuid != "" || remainder != "" {
- return false
- }
-
- // First make sure supplied token is valid.
- creds := auth.NewCredentials()
- creds.LoadTokensFromHTTPRequest(req)
-
- currentUser, ok, err := h.handler.validateAPItoken(req, creds.Tokens[0])
- if err != nil {
- httpserver.Error(w, err.Error(), http.StatusInternalServerError)
- return true
- } else if !ok {
- httpserver.Error(w, "invalid API token", http.StatusForbidden)
- return true
- }
-
- if *clusterID == "" || *clusterID == h.handler.Cluster.ClusterID {
- // Submitting container request to local cluster. No
- // need to set a runtime_token (rails api will create
- // one when the container runs) or do a remote cluster
- // request.
- return false
- }
-
- if req.Header.Get("Content-Type") != "application/json" {
- httpserver.Error(w, "Expected Content-Type: application/json, got "+req.Header.Get("Content-Type"), http.StatusBadRequest)
- return true
- }
-
- originalBody := req.Body
- defer originalBody.Close()
- var request map[string]interface{}
- err = json.NewDecoder(req.Body).Decode(&request)
- if err != nil {
- httpserver.Error(w, err.Error(), http.StatusBadRequest)
- return true
- }
-
- crString, ok := request["container_request"].(string)
- if ok {
- var crJSON map[string]interface{}
- err := json.Unmarshal([]byte(crString), &crJSON)
- if err != nil {
- httpserver.Error(w, err.Error(), http.StatusBadRequest)
- return true
- }
-
- request["container_request"] = crJSON
- }
-
- containerRequest, ok := request["container_request"].(map[string]interface{})
- if !ok {
- // Use toplevel object as the container_request object
- containerRequest = request
- }
-
- // If runtime_token is not set, create a new token
- if _, ok := containerRequest["runtime_token"]; !ok {
- if len(currentUser.Authorization.Scopes) != 1 || currentUser.Authorization.Scopes[0] != "all" {
- httpserver.Error(w, "Token scope is not [all]", http.StatusForbidden)
- return true
- }
-
- if strings.HasPrefix(currentUser.Authorization.UUID, h.handler.Cluster.ClusterID) {
- // Local user, submitting to a remote cluster.
- // Create a new time-limited token.
- newtok, err := h.handler.createAPItoken(req, currentUser.UUID, nil)
- if err != nil {
- httpserver.Error(w, err.Error(), http.StatusForbidden)
- return true
- }
- containerRequest["runtime_token"] = newtok.TokenV2()
- } else {
- // Remote user. Container request will use the
- // current token, minus the trailing portion
- // (optional container uuid).
- sp := strings.Split(creds.Tokens[0], "/")
- if len(sp) >= 3 {
- containerRequest["runtime_token"] = strings.Join(sp[0:3], "/")
- } else {
- containerRequest["runtime_token"] = creds.Tokens[0]
- }
- }
- }
-
- newbody, err := json.Marshal(request)
- buf := bytes.NewBuffer(newbody)
- req.Body = ioutil.NopCloser(buf)
- req.ContentLength = int64(buf.Len())
- req.Header.Set("Content-Length", fmt.Sprintf("%v", buf.Len()))
-
- resp, err := h.handler.remoteClusterRequest(*clusterID, req)
- h.handler.proxy.ForwardResponse(w, resp, err)
- return true
-}
wfHandler := &genericFederatedRequestHandler{next, h, wfRe, nil}
containersHandler := &genericFederatedRequestHandler{next, h, containersRe, nil}
- containerRequestsHandler := &genericFederatedRequestHandler{next, h, containerRequestsRe,
- []federatedRequestDelegate{remoteContainerRequestCreate}}
- collectionsRequestsHandler := &genericFederatedRequestHandler{next, h, collectionsRe,
- []federatedRequestDelegate{fetchRemoteCollectionByUUID, fetchRemoteCollectionByPDH}}
linksRequestsHandler := &genericFederatedRequestHandler{next, h, linksRe, nil}
mux.Handle("/arvados/v1/workflows", wfHandler)
mux.Handle("/arvados/v1/workflows/", wfHandler)
mux.Handle("/arvados/v1/containers", containersHandler)
mux.Handle("/arvados/v1/containers/", containersHandler)
- mux.Handle("/arvados/v1/container_requests", containerRequestsHandler)
- mux.Handle("/arvados/v1/container_requests/", containerRequestsHandler)
- mux.Handle("/arvados/v1/collections", collectionsRequestsHandler)
- mux.Handle("/arvados/v1/collections/", collectionsRequestsHandler)
mux.Handle("/arvados/v1/links", linksRequestsHandler)
mux.Handle("/arvados/v1/links/", linksRequestsHandler)
mux.Handle("/", next)
cluster.API.MaxItemsPerResponse = 1000
cluster.API.MaxRequestAmplification = 4
cluster.API.RequestTimeout = arvados.Duration(5 * time.Minute)
+ cluster.Collections.BlobSigning = true
+ cluster.Collections.BlobSigningKey = arvadostest.BlobSigningKey
+ cluster.Collections.BlobSigningTTL = arvados.Duration(time.Hour * 24 * 14)
arvadostest.SetServiceURL(&cluster.Services.RailsAPI, "http://localhost:1/")
arvadostest.SetServiceURL(&cluster.Services.Controller, "http://localhost:/")
s.testHandler = &Handler{Cluster: cluster}
s.testHandler.Cluster.ClusterID = "zzzzz"
s.testHandler.Cluster.SystemRootToken = arvadostest.SystemRootToken
s.testHandler.Cluster.API.MaxTokenLifetime = arvados.Duration(time.Hour)
- s.testHandler.Cluster.Collections.BlobSigningTTL = arvados.Duration(336 * time.Hour) // For some reason, this was set to 0h
resp := s.testRequest(req).Result()
c.Check(resp.StatusCode, check.Equals, http.StatusOK)
"path/filepath"
"strconv"
"strings"
+ "sync"
"git.arvados.org/arvados.git/lib/boot"
"git.arvados.org/arvados.git/lib/config"
}
}
+func (s *IntegrationSuite) TestDefaultStorageClassesOnCollections(c *check.C) {
+ conn := s.testClusters["z1111"].Conn()
+ rootctx, _, _ := s.testClusters["z1111"].RootClients()
+ userctx, _, kc, _ := s.testClusters["z1111"].UserClients(rootctx, c, conn, s.oidcprovider.AuthEmail, true)
+ c.Assert(len(kc.DefaultStorageClasses) > 0, check.Equals, true)
+ coll, err := conn.CollectionCreate(userctx, arvados.CreateOptions{})
+ c.Assert(err, check.IsNil)
+ c.Assert(coll.StorageClassesDesired, check.DeepEquals, kc.DefaultStorageClasses)
+}
+
func (s *IntegrationSuite) TestGetCollectionByPDH(c *check.C) {
conn1 := s.testClusters["z1111"].Conn()
rootctx1, _, _ := s.testClusters["z1111"].RootClients()
c.Check(coll.PortableDataHash, check.Equals, pdh)
}
+// Tests bug #18004
+func (s *IntegrationSuite) TestRemoteUserAndTokenCacheRace(c *check.C) {
+ conn1 := s.testClusters["z1111"].Conn()
+ rootctx1, _, _ := s.testClusters["z1111"].RootClients()
+ rootctx2, _, _ := s.testClusters["z2222"].RootClients()
+ conn2 := s.testClusters["z2222"].Conn()
+ userctx1, _, _, _ := s.testClusters["z1111"].UserClients(rootctx1, c, conn1, "user2@example.com", true)
+
+ var wg1, wg2 sync.WaitGroup
+ creqs := 100
+
+ // Make concurrent requests to z2222 with a local token to make sure more
+ // than one worker is listening.
+ wg1.Add(1)
+ for i := 0; i < creqs; i++ {
+ wg2.Add(1)
+ go func() {
+ defer wg2.Done()
+ wg1.Wait()
+ _, err := conn2.UserGetCurrent(rootctx2, arvados.GetOptions{})
+ c.Check(err, check.IsNil, check.Commentf("warm up phase failed"))
+ }()
+ }
+ wg1.Done()
+ wg2.Wait()
+
+ // Real test pass -- use a new remote token than the one used in the warm-up
+ // phase.
+ wg1.Add(1)
+ for i := 0; i < creqs; i++ {
+ wg2.Add(1)
+ go func() {
+ defer wg2.Done()
+ wg1.Wait()
+ // Retrieve the remote collection from cluster z2222.
+ _, err := conn2.UserGetCurrent(userctx1, arvados.GetOptions{})
+ c.Check(err, check.IsNil, check.Commentf("testing phase failed"))
+ }()
+ }
+ wg1.Done()
+ wg2.Wait()
+}
+
func (s *IntegrationSuite) TestS3WithFederatedToken(c *check.C) {
if _, err := exec.LookPath("s3cmd"); err != nil {
c.Skip("s3cmd not in PATH")
}
// We test the direct access to the database
-// normally an integration test would not have a database access, but in this case we need
+// normally an integration test would not have a database access, but in this case we need
// to test tokens that are secret, so there is no API response that will give them back
func (s *IntegrationSuite) dbConn(c *check.C, clusterID string) (*sql.DB, *sql.Conn) {
ctx := context.Background()
}
}
+// Test for bug #18076
+func (s *IntegrationSuite) TestStaleCachedUserRecord(c *check.C) {
+ rootctx1, _, _ := s.testClusters["z1111"].RootClients()
+ conn1 := s.testClusters["z1111"].Conn()
+ conn3 := s.testClusters["z3333"].Conn()
+
+ // Make sure LoginCluster is properly configured
+ for cls := range s.testClusters {
+ if cls == "z1111" || cls == "z3333" {
+ c.Check(
+ s.testClusters[cls].Config.Clusters[cls].Login.LoginCluster,
+ check.Equals, "z1111",
+ check.Commentf("incorrect LoginCluster config on cluster %q", cls))
+ }
+ }
+
+ // Create some users, request them on the federated cluster so they're cached.
+ var users []arvados.User
+ for userNr := 0; userNr < 2; userNr++ {
+ _, _, _, user := s.testClusters["z1111"].UserClients(
+ rootctx1,
+ c,
+ conn1,
+ fmt.Sprintf("user%d@example.com", userNr),
+ true)
+ c.Assert(user.Username, check.Not(check.Equals), "")
+ users = append(users, user)
+
+ lst, err := conn3.UserList(rootctx1, arvados.ListOptions{Limit: -1})
+ c.Assert(err, check.Equals, nil)
+ userFound := false
+ for _, fedUser := range lst.Items {
+ if fedUser.UUID == user.UUID {
+ c.Assert(fedUser.Username, check.Equals, user.Username)
+ userFound = true
+ break
+ }
+ }
+ c.Assert(userFound, check.Equals, true)
+ }
+
+ // Swap the usernames
+ _, err := conn1.UserUpdate(rootctx1, arvados.UpdateOptions{
+ UUID: users[0].UUID,
+ Attrs: map[string]interface{}{
+ "username": "",
+ },
+ })
+ c.Assert(err, check.Equals, nil)
+ _, err = conn1.UserUpdate(rootctx1, arvados.UpdateOptions{
+ UUID: users[1].UUID,
+ Attrs: map[string]interface{}{
+ "username": users[0].Username,
+ },
+ })
+ c.Assert(err, check.Equals, nil)
+ _, err = conn1.UserUpdate(rootctx1, arvados.UpdateOptions{
+ UUID: users[0].UUID,
+ Attrs: map[string]interface{}{
+ "username": users[1].Username,
+ },
+ })
+ c.Assert(err, check.Equals, nil)
+
+ // Re-request the list on the federated cluster & check for updates
+ lst, err := conn3.UserList(rootctx1, arvados.ListOptions{Limit: -1})
+ c.Assert(err, check.Equals, nil)
+ var user0Found, user1Found bool
+ for _, user := range lst.Items {
+ if user.UUID == users[0].UUID {
+ user0Found = true
+ c.Assert(user.Username, check.Equals, users[1].Username)
+ } else if user.UUID == users[1].UUID {
+ user1Found = true
+ c.Assert(user.Username, check.Equals, users[0].Username)
+ }
+ }
+ c.Assert(user0Found, check.Equals, true)
+ c.Assert(user1Found, check.Equals, true)
+}
+
// Test for bug #16263
func (s *IntegrationSuite) TestListUsers(c *check.C) {
rootctx1, _, _ := s.testClusters["z1111"].RootClients()
for _, user := range lst.Items {
if user.Username == "" {
nullUsername = true
+ break
}
}
c.Assert(nullUsername, check.Equals, true)
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package localdb
+
+import (
+ "context"
+ "time"
+
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/auth"
+)
+
+// CollectionGet defers to railsProxy for everything except blob
+// signatures.
+func (conn *Conn) CollectionGet(ctx context.Context, opts arvados.GetOptions) (arvados.Collection, error) {
+ if len(opts.Select) > 0 {
+ // We need to know IsTrashed and TrashAt to implement
+ // signing properly, even if the caller doesn't want
+ // them.
+ opts.Select = append([]string{"is_trashed", "trash_at"}, opts.Select...)
+ }
+ resp, err := conn.railsProxy.CollectionGet(ctx, opts)
+ if err != nil {
+ return resp, err
+ }
+ conn.signCollection(ctx, &resp)
+ return resp, nil
+}
+
+// CollectionList defers to railsProxy for everything except blob
+// signatures.
+func (conn *Conn) CollectionList(ctx context.Context, opts arvados.ListOptions) (arvados.CollectionList, error) {
+ if len(opts.Select) > 0 {
+ // We need to know IsTrashed and TrashAt to implement
+ // signing properly, even if the caller doesn't want
+ // them.
+ opts.Select = append([]string{"is_trashed", "trash_at"}, opts.Select...)
+ }
+ resp, err := conn.railsProxy.CollectionList(ctx, opts)
+ if err != nil {
+ return resp, err
+ }
+ for i := range resp.Items {
+ conn.signCollection(ctx, &resp.Items[i])
+ }
+ return resp, nil
+}
+
+// CollectionCreate defers to railsProxy for everything except blob
+// signatures.
+func (conn *Conn) CollectionCreate(ctx context.Context, opts arvados.CreateOptions) (arvados.Collection, error) {
+ if len(opts.Select) > 0 {
+ // We need to know IsTrashed and TrashAt to implement
+ // signing properly, even if the caller doesn't want
+ // them.
+ opts.Select = append([]string{"is_trashed", "trash_at"}, opts.Select...)
+ }
+ resp, err := conn.railsProxy.CollectionCreate(ctx, opts)
+ if err != nil {
+ return resp, err
+ }
+ conn.signCollection(ctx, &resp)
+ return resp, nil
+}
+
+// CollectionUpdate defers to railsProxy for everything except blob
+// signatures.
+func (conn *Conn) CollectionUpdate(ctx context.Context, opts arvados.UpdateOptions) (arvados.Collection, error) {
+ if len(opts.Select) > 0 {
+ // We need to know IsTrashed and TrashAt to implement
+ // signing properly, even if the caller doesn't want
+ // them.
+ opts.Select = append([]string{"is_trashed", "trash_at"}, opts.Select...)
+ }
+ resp, err := conn.railsProxy.CollectionUpdate(ctx, opts)
+ if err != nil {
+ return resp, err
+ }
+ conn.signCollection(ctx, &resp)
+ return resp, nil
+}
+
+func (conn *Conn) signCollection(ctx context.Context, coll *arvados.Collection) {
+ if coll.IsTrashed || coll.ManifestText == "" || !conn.cluster.Collections.BlobSigning {
+ return
+ }
+ var token string
+ if creds, ok := auth.FromContext(ctx); ok && len(creds.Tokens) > 0 {
+ token = creds.Tokens[0]
+ }
+ if token == "" {
+ return
+ }
+ ttl := conn.cluster.Collections.BlobSigningTTL.Duration()
+ exp := time.Now().Add(ttl)
+ if coll.TrashAt != nil && !coll.TrashAt.IsZero() && coll.TrashAt.Before(exp) {
+ exp = *coll.TrashAt
+ }
+ coll.ManifestText = arvados.SignManifest(coll.ManifestText, token, exp, ttl, []byte(conn.cluster.Collections.BlobSigningKey))
+}
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package localdb
+
+import (
+ "context"
+ "regexp"
+ "strconv"
+ "time"
+
+ "git.arvados.org/arvados.git/lib/config"
+ "git.arvados.org/arvados.git/lib/controller/rpc"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/arvadostest"
+ "git.arvados.org/arvados.git/sdk/go/auth"
+ "git.arvados.org/arvados.git/sdk/go/ctxlog"
+ check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&CollectionSuite{})
+
+type CollectionSuite struct {
+ cluster *arvados.Cluster
+ localdb *Conn
+ railsSpy *arvadostest.Proxy
+}
+
+func (s *CollectionSuite) TearDownSuite(c *check.C) {
+ // Undo any changes/additions to the user database so they
+ // don't affect subsequent tests.
+ arvadostest.ResetEnv()
+ c.Check(arvados.NewClientFromEnv().RequestAndDecode(nil, "POST", "database/reset", nil, nil), check.IsNil)
+}
+
+func (s *CollectionSuite) SetUpTest(c *check.C) {
+ cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
+ c.Assert(err, check.IsNil)
+ s.cluster, err = cfg.GetCluster("")
+ c.Assert(err, check.IsNil)
+ s.localdb = NewConn(s.cluster)
+ s.railsSpy = arvadostest.NewProxy(c, s.cluster.Services.RailsAPI)
+ *s.localdb.railsProxy = *rpc.NewConn(s.cluster.ClusterID, s.railsSpy.URL, true, rpc.PassthroughTokenProvider)
+}
+
+func (s *CollectionSuite) TearDownTest(c *check.C) {
+ s.railsSpy.Close()
+}
+
+func (s *CollectionSuite) TestSignatures(c *check.C) {
+ ctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{arvadostest.ActiveTokenV2}})
+
+ resp, err := s.localdb.CollectionGet(ctx, arvados.GetOptions{UUID: arvadostest.FooCollection})
+ c.Check(err, check.IsNil)
+ c.Check(resp.ManifestText, check.Matches, `(?ms).* acbd[^ ]*\+3\+A[0-9a-f]+@[0-9a-f]+ 0:.*`)
+ s.checkSignatureExpiry(c, resp.ManifestText, time.Hour*24*7*2)
+
+ resp, err = s.localdb.CollectionGet(ctx, arvados.GetOptions{UUID: arvadostest.FooCollection, Select: []string{"manifest_text"}})
+ c.Check(err, check.IsNil)
+ c.Check(resp.ManifestText, check.Matches, `(?ms).* acbd[^ ]*\+3\+A[0-9a-f]+@[0-9a-f]+ 0:.*`)
+
+ lresp, err := s.localdb.CollectionList(ctx, arvados.ListOptions{Limit: -1, Filters: []arvados.Filter{{"uuid", "=", arvadostest.FooCollection}}})
+ c.Check(err, check.IsNil)
+ if c.Check(lresp.Items, check.HasLen, 1) {
+ c.Check(lresp.Items[0].UUID, check.Equals, arvadostest.FooCollection)
+ c.Check(lresp.Items[0].ManifestText, check.Equals, "")
+ c.Check(lresp.Items[0].UnsignedManifestText, check.Equals, "")
+ }
+
+ lresp, err = s.localdb.CollectionList(ctx, arvados.ListOptions{Limit: -1, Filters: []arvados.Filter{{"uuid", "=", arvadostest.FooCollection}}, Select: []string{"manifest_text"}})
+ c.Check(err, check.IsNil)
+ if c.Check(lresp.Items, check.HasLen, 1) {
+ c.Check(lresp.Items[0].ManifestText, check.Matches, `(?ms).* acbd[^ ]*\+3\+A[0-9a-f]+@[0-9a-f]+ 0:.*`)
+ c.Check(lresp.Items[0].UnsignedManifestText, check.Equals, "")
+ }
+
+ lresp, err = s.localdb.CollectionList(ctx, arvados.ListOptions{Limit: -1, Filters: []arvados.Filter{{"uuid", "=", arvadostest.FooCollection}}, Select: []string{"unsigned_manifest_text"}})
+ c.Check(err, check.IsNil)
+ if c.Check(lresp.Items, check.HasLen, 1) {
+ c.Check(lresp.Items[0].ManifestText, check.Equals, "")
+ c.Check(lresp.Items[0].UnsignedManifestText, check.Matches, `(?ms).* acbd[^ ]*\+3 0:.*`)
+ }
+
+ // early trash date causes lower signature TTL (even if
+ // trash_at and is_trashed fields are unselected)
+ trashed, err := s.localdb.CollectionCreate(ctx, arvados.CreateOptions{
+ Select: []string{"uuid", "manifest_text"},
+ Attrs: map[string]interface{}{
+ "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+ "trash_at": time.Now().UTC().Add(time.Hour),
+ }})
+ c.Assert(err, check.IsNil)
+ s.checkSignatureExpiry(c, trashed.ManifestText, time.Hour)
+ resp, err = s.localdb.CollectionGet(ctx, arvados.GetOptions{UUID: trashed.UUID})
+ c.Assert(err, check.IsNil)
+ s.checkSignatureExpiry(c, resp.ManifestText, time.Hour)
+
+ // distant future trash date does not cause higher signature TTL
+ trashed, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{
+ UUID: trashed.UUID,
+ Attrs: map[string]interface{}{
+ "trash_at": time.Now().UTC().Add(time.Hour * 24 * 365),
+ }})
+ c.Assert(err, check.IsNil)
+ s.checkSignatureExpiry(c, trashed.ManifestText, time.Hour*24*7*2)
+ resp, err = s.localdb.CollectionGet(ctx, arvados.GetOptions{UUID: trashed.UUID})
+ c.Assert(err, check.IsNil)
+ s.checkSignatureExpiry(c, resp.ManifestText, time.Hour*24*7*2)
+
+ // Make sure groups/contents doesn't return manifest_text with
+ // collections (if it did, we'd need to sign it).
+ gresp, err := s.localdb.GroupContents(ctx, arvados.GroupContentsOptions{
+ Limit: -1,
+ Filters: []arvados.Filter{{"uuid", "=", arvadostest.FooCollection}},
+ Select: []string{"uuid", "manifest_text"},
+ })
+ c.Check(err, check.IsNil)
+ if c.Check(gresp.Items, check.HasLen, 1) {
+ c.Check(gresp.Items[0].(map[string]interface{})["uuid"], check.Equals, arvadostest.FooCollection)
+ c.Check(gresp.Items[0].(map[string]interface{})["manifest_text"], check.Equals, nil)
+ }
+}
+
+func (s *CollectionSuite) checkSignatureExpiry(c *check.C, manifestText string, expectedTTL time.Duration) {
+ m := regexp.MustCompile(`@([[:xdigit:]]+)`).FindStringSubmatch(manifestText)
+ c.Assert(m, check.HasLen, 2)
+ sigexp, err := strconv.ParseInt(m[1], 16, 64)
+ c.Assert(err, check.IsNil)
+ expectedExp := time.Now().Add(expectedTTL).Unix()
+ c.Check(sigexp > expectedExp-60, check.Equals, true)
+ c.Check(sigexp <= expectedExp, check.Equals, true)
+}
+
+func (s *CollectionSuite) TestSignaturesDisabled(c *check.C) {
+ s.localdb.cluster.Collections.BlobSigning = false
+ ctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{arvadostest.ActiveTokenV2}})
+
+ resp, err := s.localdb.CollectionGet(ctx, arvados.GetOptions{UUID: arvadostest.FooCollection})
+ c.Check(err, check.IsNil)
+ c.Check(resp.ManifestText, check.Matches, `(?ms).* acbd[^ +]*\+3 0:.*`)
+}
"net/http"
"os"
"path/filepath"
+ "time"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/arvadostest"
PostgreSQL: integrationTestCluster().PostgreSQL,
}}
handler.Cluster.TLS.Insecure = true
+ handler.Cluster.Collections.BlobSigning = true
+ handler.Cluster.Collections.BlobSigningKey = arvadostest.BlobSigningKey
+ handler.Cluster.Collections.BlobSigningTTL = arvados.Duration(time.Hour * 24 * 14)
arvadostest.SetServiceURL(&handler.Cluster.Services.RailsAPI, "https://"+os.Getenv("ARVADOS_TEST_API_HOST"))
arvadostest.SetServiceURL(&handler.Cluster.Services.Controller, "http://localhost:/")
}
func (s *Suite) SetUpSuite(c *check.C) {
- arvadostest.StartAPI()
arvadostest.StartKeep(2, true)
// Get the various arvados, arvadosclient, and keep client objects
if len(cl.Items) == 1 {
imageCollection = cl.Items[0]
} else {
- collectionName := collectionName + " " + time.Now().UTC().Format(time.RFC3339)
+ collectionName := "converting " + collectionName
exp := time.Now().Add(24 * 7 * 2 * time.Hour)
err = containerClient.RequestAndDecode(&imageCollection,
arvados.EndpointCollectionCreate.Method,
"name": collectionName,
"trash_at": exp.UTC().Format(time.RFC3339),
},
+ "ensure_unique_name": true,
})
if err != nil {
return nil, fmt.Errorf("error creating '%v' collection: %s", collectionName, err)
}
if _, err := os.Stat(imageFilename); os.IsNotExist(err) {
+ // Make sure the docker image is readable, and error
+ // out if not.
+ if _, err := os.Stat(imageTarballPath); err != nil {
+ return err
+ }
+
e.logf("building singularity image")
// "singularity build" does not accept a
// docker-archive://... filename containing a ":" character,
errorRate float64
}
-func (stub lsfstub) stubCommand(c *check.C) func(prog string, args ...string) *exec.Cmd {
+func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...string) *exec.Cmd {
mtx := sync.Mutex{}
nextjobid := 100
fakejobq := map[int]string{}
}
switch prog {
case "bsub":
- c.Assert(args, check.HasLen, 4)
+ defaultArgs := s.disp.Cluster.Containers.LSF.BsubArgumentsList
+ c.Assert(args, check.HasLen, 4+len(defaultArgs))
+ c.Check(args[:len(defaultArgs)], check.DeepEquals, defaultArgs)
+ args = args[len(defaultArgs):]
+
c.Check(args[0], check.Equals, "-J")
switch args[1] {
case arvadostest.LockedContainerUUID:
s.disp.lsfcli.stubCommand = lsfstub{
errorRate: 0.1,
sudoUser: s.disp.Cluster.Containers.LSF.BsubSudoUser,
- }.stubCommand(c)
+ }.stubCommand(s, c)
s.disp.Start()
deadline := time.Now().Add(20 * time.Second)
for range time.NewTicker(time.Second).C {
type Suite struct{}
func (*Suite) SetUpSuite(c *check.C) {
- arvadostest.StartAPI()
arvadostest.StartKeep(2, true)
}
type UpdateOptions struct {
UUID string `json:"uuid"`
Attrs map[string]interface{} `json:"attrs"`
+ Select []string `json:"select"`
BypassFederation bool `json:"bypass_federation"`
}
data, err := ioutil.ReadFile(file)
if err != nil {
if !os.IsNotExist(err) {
- log.Printf("error reading %q: %s", file, err)
+ log.Printf("proceeding without loading cert file %q: %s", file, err)
}
continue
}
return value, ErrInvalidArgument
}
+// ClusterConfig returns the value of the given key in the current cluster's
+// exported config. If key is an empty string, it'll return the entire config.
+func (c *ArvadosClient) ClusterConfig(key string) (config interface{}, err error) {
+ var clusterConfig interface{}
+ err = c.Call("GET", "config", "", "", nil, &clusterConfig)
+ if err != nil {
+ return nil, err
+ }
+ if key == "" {
+ return clusterConfig, nil
+ }
+ configData, ok := clusterConfig.(map[string]interface{})[key]
+ if !ok {
+ return nil, ErrInvalidArgument
+ }
+ return configData, nil
+}
+
func (c *ArvadosClient) httpClient() *http.Client {
if c.Client != nil {
return c.Client
c.Assert(value, IsNil)
}
+func (s *ServerRequiredSuite) TestAPIClusterConfig_Get_StorageClasses(c *C) {
+ arv, err := MakeArvadosClient()
+ c.Assert(err, IsNil)
+ data, err := arv.ClusterConfig("StorageClasses")
+ c.Assert(err, IsNil)
+ c.Assert(data, NotNil)
+ clusterConfig := data.(map[string]interface{})
+ _, ok := clusterConfig["default"]
+ c.Assert(ok, Equals, true)
+}
+
+func (s *ServerRequiredSuite) TestAPIClusterConfig_Get_All(c *C) {
+ arv, err := MakeArvadosClient()
+ c.Assert(err, IsNil)
+ data, err := arv.ClusterConfig("")
+ c.Assert(err, IsNil)
+ c.Assert(data, NotNil)
+ clusterConfig := data.(map[string]interface{})
+ _, ok := clusterConfig["StorageClasses"]
+ c.Assert(ok, Equals, true)
+}
+
+func (s *ServerRequiredSuite) TestAPIClusterConfig_Get_noSuchSection(c *C) {
+ arv, err := MakeArvadosClient()
+ c.Assert(err, IsNil)
+ data, err := arv.ClusterConfig("noSuchSection")
+ c.Assert(err, NotNil)
+ c.Assert(data, IsNil)
+}
+
func (s *ServerRequiredSuite) TestCreateLarge(c *C) {
arv, err := MakeArvadosClient()
c.Assert(err, IsNil)
package arvadostest
import (
- "bufio"
- "bytes"
+ "crypto/tls"
"fmt"
"io/ioutil"
"log"
+ "net/http"
"os"
"os/exec"
"path"
"strconv"
"strings"
+
+ "gopkg.in/check.v1"
)
var authSettings = make(map[string]string)
-// ResetEnv resets test env
+// ResetEnv resets ARVADOS_* env vars to whatever they were the first
+// time this func was called.
+//
+// Call it from your SetUpTest or SetUpSuite func if your tests modify
+// env vars.
func ResetEnv() {
- for k, v := range authSettings {
- os.Setenv(k, v)
- }
-}
-
-// APIHost returns the address:port of the current test server.
-func APIHost() string {
- h := authSettings["ARVADOS_API_HOST"]
- if h == "" {
- log.Fatal("arvadostest.APIHost() was called but authSettings is not populated")
- }
- return h
-}
-
-// ParseAuthSettings parses auth settings from given input
-func ParseAuthSettings(authScript []byte) {
- scanner := bufio.NewScanner(bytes.NewReader(authScript))
- for scanner.Scan() {
- line := scanner.Text()
- if 0 != strings.Index(line, "export ") {
- log.Printf("Ignoring: %v", line)
- continue
+ if len(authSettings) == 0 {
+ for _, e := range os.Environ() {
+ e := strings.SplitN(e, "=", 2)
+ if len(e) == 2 {
+ authSettings[e[0]] = e[1]
+ }
}
- toks := strings.SplitN(strings.Replace(line, "export ", "", 1), "=", 2)
- if len(toks) == 2 {
- authSettings[toks[0]] = toks[1]
- } else {
- log.Fatalf("Could not parse: %v", line)
+ } else {
+ for k, v := range authSettings {
+ os.Setenv(k, v)
}
}
- log.Printf("authSettings: %v", authSettings)
}
var pythonTestDir string
}
}
-// StartAPI starts test API server
-func StartAPI() {
- cwd, _ := os.Getwd()
- defer os.Chdir(cwd)
- chdirToPythonTests()
-
- cmd := exec.Command("python", "run_test_server.py", "start", "--auth", "admin")
- cmd.Stdin = nil
- cmd.Stderr = os.Stderr
-
- authScript, err := cmd.Output()
- if err != nil {
- log.Fatalf("%+v: %s", cmd.Args, err)
- }
- ParseAuthSettings(authScript)
- ResetEnv()
-}
-
-// StopAPI stops test API server
-func StopAPI() {
- cwd, _ := os.Getwd()
- defer os.Chdir(cwd)
- chdirToPythonTests()
-
- cmd := exec.Command("python", "run_test_server.py", "stop")
- bgRun(cmd)
- // Without Wait, "go test" in go1.10.1 tends to hang. https://github.com/golang/go/issues/24050
- cmd.Wait()
+func ResetDB(c *check.C) {
+ hc := http.Client{Transport: &http.Transport{
+ TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
+ }}
+ req, err := http.NewRequest("POST", "https://"+os.Getenv("ARVADOS_TEST_API_HOST")+"/database/reset", nil)
+ c.Assert(err, check.IsNil)
+ req.Header.Set("Authorization", "Bearer "+AdminToken)
+ resp, err := hc.Do(req)
+ c.Assert(err, check.IsNil)
+ defer resp.Body.Close()
+ c.Check(resp.StatusCode, check.Equals, http.StatusOK)
}
// StartKeep starts the given number of keep servers,
type suite struct{}
-func (s *suite) SetUpSuite(c *C) {
- arvadostest.StartAPI()
-}
-
-func (s *suite) TearDownSuite(c *C) {
- arvadostest.StopAPI()
-}
-
func (s *suite) TestTrackContainer(c *C) {
arv, err := arvadosclient.MakeArvadosClient()
c.Assert(err, Equals, nil)
// KeepClient holds information about Arvados and Keep servers.
type KeepClient struct {
- Arvados *arvadosclient.ArvadosClient
- Want_replicas int
- localRoots map[string]string
- writableLocalRoots map[string]string
- gatewayRoots map[string]string
- lock sync.RWMutex
- HTTPClient HTTPClient
- Retries int
- BlockCache *BlockCache
- RequestID string
- StorageClasses []string
+ Arvados *arvadosclient.ArvadosClient
+ Want_replicas int
+ localRoots map[string]string
+ writableLocalRoots map[string]string
+ gatewayRoots map[string]string
+ lock sync.RWMutex
+ HTTPClient HTTPClient
+ Retries int
+ BlockCache *BlockCache
+ RequestID string
+ StorageClasses []string
+ DefaultStorageClasses []string // Set by cluster's exported config
// set to 1 if all writable services are of disk type, otherwise 0
replicasPerService int
disableDiscovery bool
}
-// MakeKeepClient creates a new KeepClient, calls
+func (kc *KeepClient) loadDefaultClasses() error {
+ scData, err := kc.Arvados.ClusterConfig("StorageClasses")
+ if err != nil {
+ return err
+ }
+ classes := scData.(map[string]interface{})
+ for scName := range classes {
+ scConf, _ := classes[scName].(map[string]interface{})
+ isDefault, ok := scConf["Default"].(bool)
+ if ok && isDefault {
+ kc.DefaultStorageClasses = append(kc.DefaultStorageClasses, scName)
+ }
+ }
+ return nil
+}
+
+// MakeKeepClient creates a new KeepClient, loads default storage classes, calls
// DiscoverKeepServices(), and returns when the client is ready to
// use.
func MakeKeepClient(arv *arvadosclient.ArvadosClient) (*KeepClient, error) {
defaultReplicationLevel = int(v)
}
}
- return &KeepClient{
+ kc := &KeepClient{
Arvados: arv,
Want_replicas: defaultReplicationLevel,
Retries: 2,
}
+ err = kc.loadDefaultClasses()
+ if err != nil {
+ DebugPrintf("DEBUG: Unable to load the default storage classes cluster config")
+ }
+ return kc
}
// PutHR puts a block given the block hash, a reader, and the number of bytes
"git.arvados.org/arvados.git/sdk/go/arvadosclient"
"git.arvados.org/arvados.git/sdk/go/arvadostest"
. "gopkg.in/check.v1"
- check "gopkg.in/check.v1"
)
// Gocheck boilerplate
}
func (s *ServerRequiredSuite) SetUpSuite(c *C) {
- arvadostest.StartAPI()
arvadostest.StartKeep(2, false)
}
func (s *ServerRequiredSuite) TearDownSuite(c *C) {
arvadostest.StopKeep(2)
- arvadostest.StopAPI()
}
func (s *ServerRequiredSuite) SetUpTest(c *C) {
}
}
+func (s *ServerRequiredSuite) TestDefaultStorageClasses(c *C) {
+ arv, err := arvadosclient.MakeArvadosClient()
+ c.Assert(err, IsNil)
+
+ cc, err := arv.ClusterConfig("StorageClasses")
+ c.Assert(err, IsNil)
+ c.Assert(cc, NotNil)
+ c.Assert(cc.(map[string]interface{})["default"], NotNil)
+
+ kc := New(arv)
+ c.Assert(kc.DefaultStorageClasses, DeepEquals, []string{"default"})
+}
+
func (s *ServerRequiredSuite) TestDefaultReplications(c *C) {
arv, err := arvadosclient.MakeArvadosClient()
- c.Assert(err, Equals, nil)
+ c.Assert(err, IsNil)
kc, err := MakeKeepClient(arv)
c.Check(err, IsNil)
// bind to 0.0.0.0 or [::] which is not a valid address for Dial()
ks.listener, err = net.ListenTCP("tcp", &net.TCPAddr{IP: []byte{127, 0, 0, 1}, Port: 0})
if err != nil {
- panic(fmt.Sprintf("Could not listen on any port"))
+ panic("Could not listen on any port")
}
ks.url = fmt.Sprintf("http://%s", ks.listener.Addr().String())
go http.Serve(ks.listener, st)
}
}
-func (s *StandaloneSuite) TestPutWithStorageClasses(c *C) {
+func (s *StandaloneSuite) TestPutWithoutStorageClassesClusterSupport(c *C) {
nServers := 5
for _, trial := range []struct {
replicas int
clientClasses []string
- putClasses []string // putClasses takes precedence over clientClasses
+ putClasses []string
minRequests int
maxRequests int
success bool
}{
+ // Talking to an older cluster (no default storage classes exported
+ // config) and no other additional storage classes requirements.
+ {1, nil, nil, 1, 1, true},
+ {2, nil, nil, 2, 2, true},
+ {3, nil, nil, 3, 3, true},
+ {nServers*2 + 1, nil, nil, nServers, nServers, false},
+
{1, []string{"class1"}, nil, 1, 1, true},
- {2, []string{"class1"}, nil, 1, 2, true},
- {3, []string{"class1"}, nil, 2, 3, true},
+ {2, []string{"class1"}, nil, 2, 2, true},
+ {3, []string{"class1"}, nil, 3, 3, true},
{1, []string{"class1", "class2"}, nil, 1, 1, true},
- {3, nil, []string{"class1"}, 2, 3, true},
- {1, nil, []string{"class1", "class2"}, 1, 1, true},
- {1, []string{"class404"}, []string{"class1", "class2"}, 1, 1, true},
- {1, []string{"class1"}, []string{"class404", "class2"}, nServers, nServers, false},
{nServers*2 + 1, []string{"class1"}, nil, nServers, nServers, false},
- {1, []string{"class404"}, nil, nServers, nServers, false},
- {1, []string{"class1", "class404"}, nil, nServers, nServers, false},
- {1, nil, []string{"class1", "class404"}, nServers, nServers, false},
+
+ {1, nil, []string{"class1"}, 1, 1, true},
+ {2, nil, []string{"class1"}, 2, 2, true},
+ {3, nil, []string{"class1"}, 3, 3, true},
+ {1, nil, []string{"class1", "class2"}, 1, 1, true},
+ {nServers*2 + 1, nil, []string{"class1"}, nServers, nServers, false},
+ } {
+ c.Logf("%+v", trial)
+ st := &StubPutHandler{
+ c: c,
+ expectPath: "acbd18db4cc2f85cedef654fccc4a4d8",
+ expectAPIToken: "abc123",
+ expectBody: "foo",
+ expectStorageClass: "*",
+ returnStorageClasses: "", // Simulate old cluster without SC keep support
+ handled: make(chan string, 100),
+ }
+ ks := RunSomeFakeKeepServers(st, nServers)
+ arv, _ := arvadosclient.MakeArvadosClient()
+ kc, _ := MakeKeepClient(arv)
+ kc.Want_replicas = trial.replicas
+ kc.StorageClasses = trial.clientClasses
+ kc.DefaultStorageClasses = nil // Simulate an old cluster without SC defaults
+ arv.ApiToken = "abc123"
+ localRoots := make(map[string]string)
+ writableLocalRoots := make(map[string]string)
+ for i, k := range ks {
+ localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+ writableLocalRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+ defer k.listener.Close()
+ }
+ kc.SetServiceRoots(localRoots, writableLocalRoots, nil)
+
+ _, err := kc.BlockWrite(context.Background(), arvados.BlockWriteOptions{
+ Data: []byte("foo"),
+ StorageClasses: trial.putClasses,
+ })
+ if trial.success {
+ c.Check(err, IsNil)
+ } else {
+ c.Check(err, NotNil)
+ }
+ c.Check(len(st.handled) >= trial.minRequests, Equals, true, Commentf("len(st.handled)==%d, trial.minRequests==%d", len(st.handled), trial.minRequests))
+ c.Check(len(st.handled) <= trial.maxRequests, Equals, true, Commentf("len(st.handled)==%d, trial.maxRequests==%d", len(st.handled), trial.maxRequests))
+ if trial.clientClasses == nil && trial.putClasses == nil {
+ c.Check(st.requests[0].Header.Get("X-Keep-Storage-Classes"), Equals, "")
+ }
+ }
+}
+
+func (s *StandaloneSuite) TestPutWithStorageClasses(c *C) {
+ nServers := 5
+ for _, trial := range []struct {
+ replicas int
+ defaultClasses []string
+ clientClasses []string // clientClasses takes precedence over defaultClasses
+ putClasses []string // putClasses takes precedence over clientClasses
+ minRequests int
+ maxRequests int
+ success bool
+ }{
+ {1, []string{"class1"}, nil, nil, 1, 1, true},
+ {2, []string{"class1"}, nil, nil, 1, 2, true},
+ {3, []string{"class1"}, nil, nil, 2, 3, true},
+ {1, []string{"class1", "class2"}, nil, nil, 1, 1, true},
+
+ // defaultClasses doesn't matter when any of the others is specified.
+ {1, []string{"class1"}, []string{"class1"}, nil, 1, 1, true},
+ {2, []string{"class1"}, []string{"class1"}, nil, 1, 2, true},
+ {3, []string{"class1"}, []string{"class1"}, nil, 2, 3, true},
+ {1, []string{"class1"}, []string{"class1", "class2"}, nil, 1, 1, true},
+ {3, []string{"class1"}, nil, []string{"class1"}, 2, 3, true},
+ {1, []string{"class1"}, nil, []string{"class1", "class2"}, 1, 1, true},
+ {1, []string{"class1"}, []string{"class404"}, []string{"class1", "class2"}, 1, 1, true},
+ {1, []string{"class1"}, []string{"class1"}, []string{"class404", "class2"}, nServers, nServers, false},
+ {nServers*2 + 1, []string{}, []string{"class1"}, nil, nServers, nServers, false},
+ {1, []string{"class1"}, []string{"class404"}, nil, nServers, nServers, false},
+ {1, []string{"class1"}, []string{"class1", "class404"}, nil, nServers, nServers, false},
+ {1, []string{"class1"}, nil, []string{"class1", "class404"}, nServers, nServers, false},
} {
c.Logf("%+v", trial)
st := &StubPutHandler{
kc, _ := MakeKeepClient(arv)
kc.Want_replicas = trial.replicas
kc.StorageClasses = trial.clientClasses
+ kc.DefaultStorageClasses = trial.defaultClasses
arv.ApiToken = "abc123"
localRoots := make(map[string]string)
writableLocalRoots := make(map[string]string)
StorageClasses: trial.putClasses,
})
if trial.success {
- c.Check(err, check.IsNil)
+ c.Check(err, IsNil)
} else {
- c.Check(err, check.NotNil)
+ c.Check(err, NotNil)
}
- c.Check(len(st.handled) >= trial.minRequests, check.Equals, true, check.Commentf("len(st.handled)==%d, trial.minRequests==%d", len(st.handled), trial.minRequests))
- c.Check(len(st.handled) <= trial.maxRequests, check.Equals, true, check.Commentf("len(st.handled)==%d, trial.maxRequests==%d", len(st.handled), trial.maxRequests))
- if !trial.success && trial.replicas == 1 && c.Check(len(st.requests) >= 2, check.Equals, true) {
+ c.Check(len(st.handled) >= trial.minRequests, Equals, true, Commentf("len(st.handled)==%d, trial.minRequests==%d", len(st.handled), trial.minRequests))
+ c.Check(len(st.handled) <= trial.maxRequests, Equals, true, Commentf("len(st.handled)==%d, trial.maxRequests==%d", len(st.handled), trial.maxRequests))
+ if !trial.success && trial.replicas == 1 && c.Check(len(st.requests) >= 2, Equals, true) {
// Max concurrency should be 1. First request
// should have succeeded for class1. Second
// request should only ask for class404.
- c.Check(st.requests[1].Header.Get("X-Keep-Storage-Classes"), check.Equals, "class404")
+ c.Check(st.requests[1].Header.Get("X-Keep-Storage-Classes"), Equals, "class404")
}
}
}
expectPath: hash,
expectAPIToken: "abc123",
expectBody: "foo",
- expectStorageClass: "",
+ expectStorageClass: "default",
returnStorageClasses: "",
handled: make(chan string, 5),
}
expectPath: hash,
expectAPIToken: "abc123",
expectBody: "foo",
- expectStorageClass: "",
+ expectStorageClass: "default",
returnStorageClasses: "",
handled: make(chan string, 5),
}
expectPath: hash,
expectAPIToken: "abc123",
expectBody: "foo",
- expectStorageClass: "",
+ expectStorageClass: "default",
returnStorageClasses: "",
handled: make(chan string, 4),
}
expectPath: hash,
expectAPIToken: "abc123",
expectBody: "foo",
- expectStorageClass: "",
+ expectStorageClass: "default",
returnStorageClasses: "",
handled: make(chan string, 1),
}
expectPath: hash,
expectAPIToken: "abc123",
expectBody: "foo",
- expectStorageClass: "",
+ expectStorageClass: "default",
returnStorageClasses: "",
handled: make(chan string, 5),
}
expectPath: Md5String("foo"),
expectAPIToken: "abc123",
expectBody: "foo",
- expectStorageClass: "",
+ expectStorageClass: "default",
returnStorageClasses: "",
handled: make(chan string, 5),
},
req.Hash = fmt.Sprintf("%x", m.Sum(nil))
}
if req.StorageClasses == nil {
- req.StorageClasses = kc.StorageClasses
+ if len(kc.StorageClasses) > 0 {
+ req.StorageClasses = kc.StorageClasses
+ } else {
+ req.StorageClasses = kc.DefaultStorageClasses
+ }
}
if req.Replicas == 0 {
req.Replicas = kc.Want_replicas
:storage_classes_desired:
A list of storage class names where to upload the data. If None,
- the keepstores are expected to store the data into their default
- storage class.
+ the keep client is expected to store the data into the cluster's
+ default storage class(es).
"""
self.hits_counter = Counter()
self.misses_counter = Counter()
self._storage_classes_unsupported_warning = False
+ self._default_classes = []
if local_store:
self.local_store = local_store
self._writable_services = None
self.using_proxy = None
self._static_services_list = False
+ try:
+ self._default_classes = [
+ k for k, v in self.api_client.config()['StorageClasses'].items() if v['Default']]
+ except KeyError:
+ # We're talking to an old cluster
+ pass
def current_timeout(self, attempt_number):
"""Return the appropriate timeout to use for this client.
"failed to read {} after {}".format(loc_s, loop.attempts_str()), service_errors, label="service")
@retry.retry_method
- def put(self, data, copies=2, num_retries=None, request_id=None, classes=[]):
+ def put(self, data, copies=2, num_retries=None, request_id=None, classes=None):
"""Save data in Keep.
This method will get a list of Keep services from the API server, and
be written.
"""
+ classes = classes or self._default_classes
+
if not isinstance(data, bytes):
data = data.encode()
class ApiClientMock(object):
def api_client_mock(self):
- return mock.MagicMock(name='api_client_mock')
+ api_mock = mock.MagicMock(name='api_client_mock')
+ api_mock.config.return_value = {
+ 'StorageClasses': {
+ 'default': {'Default': True}
+ }
+ }
+ return api_mock
def mock_keep_services(self, api_mock=None, status=200, count=12,
service_type='disk',
self.data = b'xyzzy'
self.locator = '1271ed5ef305aadabc605b1609e24c52'
+ def test_multiple_default_storage_classes_req_header(self):
+ api_mock = self.api_client_mock()
+ api_mock.config.return_value = {
+ 'StorageClasses': {
+ 'foo': { 'Default': True },
+ 'bar': { 'Default': True },
+ 'baz': { 'Default': False }
+ }
+ }
+ api_client = self.mock_keep_services(api_mock=api_mock, count=2)
+ keep_client = arvados.KeepClient(api_client=api_client)
+ resp_hdr = {
+ 'x-keep-storage-classes-confirmed': 'foo=1, bar=1',
+ 'x-keep-replicas-stored': 1
+ }
+ with tutil.mock_keep_responses(self.locator, 200, **resp_hdr) as mock:
+ keep_client.put(self.data, copies=1)
+ req_hdr = mock.responses[0]
+ self.assertIn(
+ 'X-Keep-Storage-Classes: bar, foo', req_hdr.getopt(pycurl.HTTPHEADER))
+
def test_storage_classes_req_header(self):
+ self.assertEqual(
+ self.api_client.config()['StorageClasses'],
+ {'default': {'Default': True}})
cases = [
# requested, expected
[['foo'], 'X-Keep-Storage-Classes: foo'],
[['bar', 'foo'], 'X-Keep-Storage-Classes: bar, foo'],
- [[], None],
+ [[], 'X-Keep-Storage-Classes: default'],
+ [None, 'X-Keep-Storage-Classes: default'],
]
for req_classes, expected_header in cases:
headers = {'x-keep-replicas-stored': 1}
- if len(req_classes) > 0:
+ if req_classes is None or len(req_classes) == 0:
+ confirmed_hdr = 'default=1'
+ elif len(req_classes) > 0:
confirmed_hdr = ', '.join(["{}=1".format(cls) for cls in req_classes])
- headers.update({'x-keep-storage-classes-confirmed': confirmed_hdr})
+ headers.update({'x-keep-storage-classes-confirmed': confirmed_hdr})
with tutil.mock_keep_responses(self.locator, 200, **headers) as mock:
self.keep_client.put(self.data, copies=1, classes=req_classes)
- resp = mock.responses[0]
- if expected_header is not None:
- self.assertIn(expected_header, resp.getopt(pycurl.HTTPHEADER))
- else:
- for hdr in resp.getopt(pycurl.HTTPHEADER):
- self.assertNotRegex(hdr, r'^X-Keep-Storage-Classes.*')
+ req_hdr = mock.responses[0]
+ self.assertIn(expected_header, req_hdr.getopt(pycurl.HTTPHEADER))
def test_partial_storage_classes_put(self):
headers = {
return "abc"
elif r == "insecure":
return False
+ elif r == "config":
+ return lambda: {}
else:
raise arvados.errors.KeepReadError()
keep_client = arvados.KeepClient(api_client=ApiMock(),
include_old_versions: params[:include_old_versions],
}
- # It matters which Collection object we pick because we use it to get signed_manifest_text,
- # the value of which is affected by the value of trash_at.
+ # It matters which Collection object we pick because blob
+ # signatures depend on the value of trash_at.
#
- # From postgres doc: "By default, null values sort as if larger than any non-null
- # value; that is, NULLS FIRST is the default for DESC order, and
- # NULLS LAST otherwise."
+ # From postgres doc: "By default, null values sort as if larger
+ # than any non-null value; that is, NULLS FIRST is the default
+ # for DESC order, and NULLS LAST otherwise."
#
# "trash_at desc" sorts null first, then latest to earliest, so
# it will select the Collection object with the longest
@object = {
uuid: c.portable_data_hash,
portable_data_hash: c.portable_data_hash,
- manifest_text: c.signed_manifest_text,
+ manifest_text: c.manifest_text,
}
end
else
end
end
if needupdate.length > 0
- u.update_attributes!(needupdate)
+ begin
+ u.update_attributes!(needupdate)
+ rescue ActiveRecord::RecordInvalid
+ loginCluster = Rails.configuration.Login.LoginCluster
+ if u.uuid[0..4] == loginCluster && !needupdate[:username].nil?
+ local_user = User.find_by_username(needupdate[:username])
+ # A cached user record from the LoginCluster is stale, reset its username
+ # and retry the update operation.
+ if local_user.andand.uuid[0..4] == loginCluster && local_user.uuid != u.uuid
+ Rails.logger.warn("cached username '#{needupdate[:username]}' collision with user '#{local_user.uuid}' - resetting")
+ local_user.update_attributes!({username: nil})
+ retry
+ end
+ end
+ raise # Not the issue we're handling above
+ end
end
@objects << u
end
user.last_name = "from cluster #{remote_user_prefix}"
end
- user.save!
+ begin
+ user.save!
+ rescue ActiveRecord::RecordInvalid, ActiveRecord::RecordNotUnique
+ Rails.logger.debug("remote user #{remote_user['uuid']} already exists, retrying...")
+ # Some other request won the race: retry fetching the user record.
+ user = User.find_by_uuid(remote_user['uuid'])
+ if !user
+ Rails.logger.warn("cannot find or create remote user #{remote_user['uuid']}")
+ return nil
+ end
+ end
if user.is_invited && !remote_user['is_invited']
# Remote user is not "invited" state, they should be unsetup, which
exp = [db_current_time + Rails.configuration.Login.RemoteTokenRefresh,
remote_token.andand['expires_at']].compact.min
scopes = remote_token.andand['scopes'] || ['all']
- auth = ApiClientAuthorization.find_or_create_by(uuid: token_uuid) do |auth|
- auth.user = user
- auth.api_token = stored_secret
- auth.api_client_id = 0
- auth.scopes = scopes
- auth.expires_at = exp
+ begin
+ retries ||= 0
+ auth = ApiClientAuthorization.find_or_create_by(uuid: token_uuid) do |auth|
+ auth.user = user
+ auth.api_token = stored_secret
+ auth.api_client_id = 0
+ auth.scopes = scopes
+ auth.expires_at = exp
+ end
+ rescue ActiveRecord::RecordNotUnique
+ Rails.logger.debug("cached remote token #{token_uuid} already exists, retrying...")
+ # Some other request won the race: retry just once before erroring out
+ if (retries += 1) <= 1
+ retry
+ else
+ Rails.logger.warn("cannot find or create cached remote token #{token_uuid}")
+ return nil
+ end
end
auth.update_attributes!(user: user,
api_token: stored_secret,
t.add :description
t.add :properties
t.add :portable_data_hash
- t.add :signed_manifest_text, as: :manifest_text
t.add :manifest_text, as: :unsigned_manifest_text
+ t.add :manifest_text, as: :manifest_text
t.add :replication_desired
t.add :replication_confirmed
t.add :replication_confirmed_at
def self.attributes_required_columns
super.merge(
- # If we don't list manifest_text explicitly, the
- # params[:select] code gets confused by the way we
- # expose signed_manifest_text as manifest_text in the
- # API response, and never let clients select the
- # manifest_text column.
- #
- # We need trash_at and is_trashed to determine the
- # correct timestamp in signed_manifest_text.
- 'manifest_text' => ['manifest_text', 'trash_at', 'is_trashed'],
+ # If we don't list unsigned_manifest_text explicitly,
+ # the params[:select] code gets confused by the way we
+ # expose manifest_text as unsigned_manifest_text in
+ # the API response, and never let clients select the
+ # unsigned_manifest_text column.
'unsigned_manifest_text' => ['manifest_text'],
'name' => ['name'],
)
end
end
- def signed_manifest_text
+ def signed_manifest_text_only_for_tests
if !has_attribute? :manifest_text
return nil
elsif is_trashed
token = Thread.current[:token]
exp = [db_current_time.to_i + Rails.configuration.Collections.BlobSigningTTL.to_i,
trash_at].compact.map(&:to_i).min
- self.class.sign_manifest manifest_text, token, exp
+ self.class.sign_manifest_only_for_tests manifest_text, token, exp
end
end
- def self.sign_manifest manifest, token, exp=nil
+ def self.sign_manifest_only_for_tests manifest, token, exp=nil
if exp.nil?
exp = db_current_time.to_i + Rails.configuration.Collections.BlobSigningTTL.to_i
end
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class DropFtsIndex < ActiveRecord::Migration[5.2]
+ def fts_indexes
+ {
+ "collections" => "collections_full_text_search_idx",
+ "container_requests" => "container_requests_full_text_search_idx",
+ "groups" => "groups_full_text_search_idx",
+ "jobs" => "jobs_full_text_search_idx",
+ "pipeline_instances" => "pipeline_instances_full_text_search_idx",
+ "pipeline_templates" => "pipeline_templates_full_text_search_idx",
+ "workflows" => "workflows_full_text_search_idx",
+ }
+ end
+
+ def up
+ fts_indexes.keys.each do |t|
+ i = fts_indexes[t]
+ execute "DROP INDEX IF EXISTS #{i}"
+ end
+ end
+
+ def down
+ fts_indexes.keys.each do |t|
+ i = fts_indexes[t]
+ execute "CREATE INDEX #{i} ON #{t} USING gin(#{t.classify.constantize.full_text_tsvector})"
+ end
+ end
+end
SET default_with_oids = false;
---
--- Name: groups; Type: TABLE; Schema: public; Owner: -
---
-
-CREATE TABLE public.groups (
- id integer NOT NULL,
- uuid character varying(255),
- owner_uuid character varying(255),
- created_at timestamp without time zone NOT NULL,
- modified_by_client_uuid character varying(255),
- modified_by_user_uuid character varying(255),
- modified_at timestamp without time zone,
- name character varying(255) NOT NULL,
- description character varying(524288),
- updated_at timestamp without time zone NOT NULL,
- group_class character varying(255),
- trash_at timestamp without time zone,
- is_trashed boolean DEFAULT false NOT NULL,
- delete_at timestamp without time zone,
- properties jsonb DEFAULT '{}'::jsonb
-);
-
-
--
-- Name: api_client_authorizations; Type: TABLE; Schema: public; Owner: -
--
ALTER SEQUENCE public.containers_id_seq OWNED BY public.containers.id;
+--
+-- Name: groups; Type: TABLE; Schema: public; Owner: -
+--
+
+CREATE TABLE public.groups (
+ id integer NOT NULL,
+ uuid character varying(255),
+ owner_uuid character varying(255),
+ created_at timestamp without time zone NOT NULL,
+ modified_by_client_uuid character varying(255),
+ modified_by_user_uuid character varying(255),
+ modified_at timestamp without time zone,
+ name character varying(255) NOT NULL,
+ description character varying(524288),
+ updated_at timestamp without time zone NOT NULL,
+ group_class character varying(255),
+ trash_at timestamp without time zone,
+ is_trashed boolean DEFAULT false NOT NULL,
+ delete_at timestamp without time zone,
+ properties jsonb DEFAULT '{}'::jsonb
+);
+
+
--
-- Name: groups_id_seq; Type: SEQUENCE; Schema: public; Owner: -
--
CREATE INDEX collection_index_on_properties ON public.collections USING gin (properties);
---
--- Name: collections_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX collections_full_text_search_idx ON public.collections USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text)), 0, 1000000)));
-
-
--
-- Name: collections_search_index; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX collections_trgm_text_search_idx ON public.collections USING gin (((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text))) public.gin_trgm_ops);
---
--- Name: container_requests_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX container_requests_full_text_search_idx ON public.container_requests USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(requesting_container_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(container_uuid, ''::character varying))::text) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(container_image, ''::character varying))::text) || ' '::text) || COALESCE(environment, ''::text)) || ' '::text) || (COALESCE(cwd, ''::character varying))::text) || ' '::text) || COALESCE(command, ''::text)) || ' '::text) || (COALESCE(output_path, ''::character varying))::text) || ' '::text) || COALESCE(filters, ''::text)) || ' '::text) || COALESCE(scheduling_parameters, ''::text)) || ' '::text) || (COALESCE(output_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output_name, ''::character varying))::text), 0, 1000000)));
-
-
--
-- Name: container_requests_index_on_properties; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX group_index_on_properties ON public.groups USING gin (properties);
---
--- Name: groups_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX groups_full_text_search_idx ON public.groups USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(group_class, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)), 0, 1000000)));
-
-
--
-- Name: groups_search_index; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX job_tasks_search_index ON public.job_tasks USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, job_uuid, created_by_job_task_uuid);
---
--- Name: jobs_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX jobs_full_text_search_idx ON public.jobs USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(submit_id, ''::character varying))::text) || ' '::text) || (COALESCE(script, ''::character varying))::text) || ' '::text) || (COALESCE(script_version, ''::character varying))::text) || ' '::text) || COALESCE(script_parameters, ''::text)) || ' '::text) || (COALESCE(cancelled_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(cancelled_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output, ''::character varying))::text) || ' '::text) || (COALESCE(is_locked_by_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log, ''::character varying))::text) || ' '::text) || COALESCE(tasks_summary, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(repository, ''::character varying))::text) || ' '::text) || (COALESCE(supplied_script_version, ''::character varying))::text) || ' '::text) || (COALESCE(docker_image_locator, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(arvados_sdk_version, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)), 0, 1000000)));
-
-
--
-- Name: jobs_search_index; Type: INDEX; Schema: public; Owner: -
--
CREATE UNIQUE INDEX permission_user_target ON public.materialized_permissions USING btree (user_uuid, target_uuid);
---
--- Name: pipeline_instances_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX pipeline_instances_full_text_search_idx ON public.pipeline_instances USING gin (to_tsvector('english'::regconfig, substr((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(pipeline_template_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || COALESCE(components_summary, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text), 0, 1000000)));
-
-
--
-- Name: pipeline_instances_search_index; Type: INDEX; Schema: public; Owner: -
--
CREATE UNIQUE INDEX pipeline_template_owner_uuid_name_unique ON public.pipeline_templates USING btree (owner_uuid, name);
---
--- Name: pipeline_templates_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX pipeline_templates_full_text_search_idx ON public.pipeline_templates USING gin (to_tsvector('english'::regconfig, substr((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text), 0, 1000000)));
-
-
--
-- Name: pipeline_templates_search_index; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX virtual_machines_search_index ON public.virtual_machines USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, hostname);
---
--- Name: workflows_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX workflows_full_text_search_idx ON public.workflows USING gin (to_tsvector('english'::regconfig, substr((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)), 0, 1000000)));
-
-
--
-- Name: workflows_search_idx; Type: INDEX; Schema: public; Owner: -
--
('20201202174753'),
('20210108033940'),
('20210126183521'),
-('20210621204455');
+('20210621204455'),
+('20210816191509');
model_table_name = model_class.table_name
filters.each do |filter|
attrs_in, operator, operand = filter
- if attrs_in == 'any' && operator != '@@'
+ if operator == '@@'
+ raise ArgumentError.new("Full text search operator is no longer supported")
+ end
+ if attrs_in == 'any'
attrs = model_class.searchable_columns(operator)
elsif attrs_in.is_a? Array
attrs = attrs_in
raise ArgumentError.new("Invalid operator '#{operator}' (#{operator.class}) in filter")
end
+ operator = operator.downcase
cond_out = []
- if attrs_in == 'any' && (operator.casecmp('ilike').zero? || operator.casecmp('like').zero?) && (operand.is_a? String) && operand.match('^[%].*[%]$')
+ if attrs_in == 'any' && (operator == 'ilike' || operator == 'like') && (operand.is_a? String) && operand.match('^[%].*[%]$')
# Trigram index search
cond_out << model_class.full_text_trgm + " #{operator} ?"
param_out << operand
attrs = []
end
- if operator == '@@'
- # Full-text search
- if attrs_in != 'any'
- raise ArgumentError.new("Full text search on individual columns is not supported")
- end
- if operand.is_a? Array
- raise ArgumentError.new("Full text search not supported for array operands")
- end
-
- # Skip the generic per-column operator loop below
- attrs = []
- # Use to_tsquery since plainto_tsquery does not support prefix
- # search. And, split operand and join the words with ' & '
- cond_out << model_class.full_text_tsvector+" @@ to_tsquery(?)"
- param_out << operand.split.join(' & ')
- end
attrs.each do |attr|
subproperty = attr.split(".", 2)
end
# jsonb search
- case operator.downcase
+ case operator
when '=', '!='
- not_in = if operator.downcase == "!=" then "NOT " else "" end
+ not_in = if operator == "!=" then "NOT " else "" end
cond_out << "#{not_in}(#{attr_table_name}.#{attr} @> ?::jsonb)"
param_out << SafeJSON.dump({proppath => operand})
when 'in'
else
raise ArgumentError.new("Invalid operator for subproperty search '#{operator}'")
end
- elsif operator.downcase == "exists"
+ elsif operator == "exists"
if col.type != :jsonb
raise ArgumentError.new("Invalid attribute '#{attr}' for operator '#{operator}' in filter")
end
cond_out << "jsonb_exists(#{attr_table_name}.#{attr}, ?)"
param_out << operand
else
- if !attr_model_class.searchable_columns(operator).index attr
+ if !attr_model_class.searchable_columns(operator).index(attr) &&
+ !(col.andand.type == :jsonb && ['contains', '=', '<>', '!='].index(operator))
raise ArgumentError.new("Invalid attribute '#{attr}' in filter")
end
- case operator.downcase
+ case operator
when '=', '<', '<=', '>', '>=', '!=', 'like', 'ilike'
attr_type = attr_model_class.attribute_column(attr).type
operator = '<>' if operator == '!='
end
end
cond_out << cond.join(' OR ')
+ when 'contains'
+ if col.andand.type != :jsonb
+ raise ArgumentError.new("Invalid attribute '#{attr}' for '#{operator}' operator")
+ end
+ if operand == []
+ raise ArgumentError.new("Invalid operand '#{operand.inspect}' for '#{operator}' operator")
+ end
+ operand = [operand] unless operand.is_a? Array
+ operand.each do |op|
+ if !op.is_a?(String)
+ raise ArgumentError.new("Invalid element #{operand.inspect} in operand for #{operator.inspect} operator (operand must be a string or array of strings)")
+ end
+ end
+ # We use jsonb_exists_all(a,b) instead of "a ?& b" because
+ # the pg gem thinks "?" is a bind var. And we use string
+ # interpolation instead of param_out because the pg gem
+ # flattens param_out / doesn't support passing arrays as
+ # bind vars.
+ q = operand.map { |s| ActiveRecord::Base.connection.quote(s) }.join(',')
+ cond_out << "jsonb_exists_all(#{attr_table_name}.#{attr}, array[#{q}])"
else
raise ArgumentError.new("Invalid operator '#{operator}'")
end
uuid: zzzzz-8i9sb-n7omg50bvt0m1nf
owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0
modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
- repository: active/foo
+ repository: active/bar
script: running_job_script
script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
state: Running
components_is_jobspec:
# Helps test that clients cope with funny-shaped components.
# For an example, see #3321.
- uuid: zzzzz-d1hrv-jobspeccomponts
- created_at: <%= 30.minute.ago.to_s(:db) %>
+ uuid: zzzzz-d1hrv-1yfj61234abcdk4
+ created_at: <%= 2.minute.ago.to_s(:db) %>
owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
- created_at: 2014-04-14 12:35:04 -0400
- updated_at: 2014-04-14 12:35:04 -0400
- modified_at: 2014-04-14 12:35:04 -0400
modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
state: RunningOnServer
end
end
- def assert_unsigned_manifest resp, label=''
- txt = resp['unsigned_manifest_text']
+ def assert_unsigned_manifest txt, label=''
assert_not_nil(txt, "#{label} unsigned_manifest_text was nil")
locs = 0
txt.scan(/ [[:xdigit:]]{32}\S*/) do |tok|
"past version not included on index")
end
- test "collections.get returns signed locators, and no unsigned_manifest_text" do
+ test "collections.get returns unsigned locators, and no unsigned_manifest_text" do
permit_unsigned_manifests
authorize_with :active
get :show, params: {id: collections(:foo_file).uuid}
assert_response :success
- assert_signed_manifest json_response['manifest_text'], 'foo_file'
+ assert_unsigned_manifest json_response["manifest_text"], 'foo_file'
refute_includes json_response, 'unsigned_manifest_text'
end
['v1token', 'v2token'].each do |token_method|
- test "correct signatures are given for #{token_method}" do
- token = api_client_authorizations(:active).send(token_method)
- authorize_with_token token
- get :show, params: {id: collections(:foo_file).uuid}
- assert_response :success
- assert_signed_manifest json_response['manifest_text'], 'foo_file', token: token
- end
-
test "signatures with #{token_method} are accepted" do
token = api_client_authorizations(:active).send(token_method)
signed = Blob.sign_locator(
},
}
assert_response :success
- assert_signed_manifest json_response['manifest_text'], 'updated', token: token
+ assert_unsigned_manifest json_response['manifest_text'], 'updated'
end
end
- test "index with manifest_text selected returns signed locators" do
+ test "index with manifest_text selected returns unsigned locators" do
columns = %w(uuid owner_uuid manifest_text)
authorize_with :active
get :index, params: {select: columns}
json_response["items"].each do |coll|
assert_equal(coll.keys - ['kind'], columns,
"Collections index did not respect selected columns")
- assert_signed_manifest coll['manifest_text'], coll['uuid']
+ assert_unsigned_manifest coll['manifest_text'], coll['uuid']
end
end
json_response["items"].each do |coll|
assert_equal(coll.keys - ['kind'], ['unsigned_manifest_text'],
"Collections index did not respect selected columns")
- locs += assert_unsigned_manifest coll, coll['uuid']
+ assert_nil coll['manifest_text']
+ locs += assert_unsigned_manifest coll['unsigned_manifest_text'], coll['uuid']
end
assert_operator locs, :>, 0, "no locators found in any manifests"
end
assert_not_nil assigns(:object)
resp = assigns(:object)
assert_equal foo_collection[:portable_data_hash], resp[:portable_data_hash]
- assert_signed_manifest resp[:manifest_text]
+ assert_unsigned_manifest resp[:manifest_text]
# The manifest in the response will have had permission hints added.
# Remove any permission hints in the response before comparing it to the source.
authorize_with :active
manifest_text = ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:foo.txt\n"
if !unsigned
- manifest_text = Collection.sign_manifest manifest_text, api_token(:active)
+ manifest_text = Collection.sign_manifest_only_for_tests manifest_text, api_token(:active)
end
post :create, params: {
collection: {
assert_not_nil assigns(:object)
resp = JSON.parse(@response.body)
assert_equal manifest_uuid, resp['portable_data_hash']
- # All of the locators in the output must be signed.
+ # All of the signatures in the output must be valid.
resp['manifest_text'].lines.each do |entry|
m = /([[:xdigit:]]{32}\+\S+)/.match(entry)
- if m
+ if m && m[0].index('+A')
assert Blob.verify_signature m[0], signing_opts
end
end
assert_not_nil assigns(:object)
resp = JSON.parse(@response.body)
assert_equal manifest_uuid, resp['portable_data_hash']
- # All of the locators in the output must be signed.
+ # All of the signatures in the output must be valid.
resp['manifest_text'].lines.each do |entry|
m = /([[:xdigit:]]{32}\+\S+)/.match(entry)
- if m
+ if m && m[0].index('+A')
assert Blob.verify_signature m[0], signing_opts
end
end
assert_equal manifest_text, stripped_manifest
end
- test "multiple signed locators per line" do
- permit_unsigned_manifests
- authorize_with :active
- locators = %w(
- d41d8cd98f00b204e9800998ecf8427e+0
- acbd18db4cc2f85cedef654fccc4a4d8+3
- ea10d51bcf88862dbcc36eb292017dfd+45)
-
- signing_opts = {
- key: Rails.configuration.Collections.BlobSigningKey,
- api_token: api_token(:active),
- }
-
- unsigned_manifest = [".", *locators, "0:0:foo.txt\n"].join(" ")
- manifest_uuid = Digest::MD5.hexdigest(unsigned_manifest) +
- '+' +
- unsigned_manifest.length.to_s
-
- signed_locators = locators.map { |loc| Blob.sign_locator loc, signing_opts }
- signed_manifest = [".", *signed_locators, "0:0:foo.txt\n"].join(" ")
-
- post :create, params: {
- collection: {
- manifest_text: signed_manifest,
- portable_data_hash: manifest_uuid,
- }
- }
- assert_response :success
- assert_not_nil assigns(:object)
- resp = JSON.parse(@response.body)
- assert_equal manifest_uuid, resp['portable_data_hash']
- # All of the locators in the output must be signed.
- # Each line is of the form "path locator locator ... 0:0:file.txt"
- # entry.split[1..-2] will yield just the tokens in the middle of the line
- returned_locator_count = 0
- resp['manifest_text'].lines.each do |entry|
- entry.split[1..-2].each do |tok|
- returned_locator_count += 1
- assert Blob.verify_signature tok, signing_opts
- end
- end
- assert_equal locators.count, returned_locator_count
- end
-
test 'Reject manifest with unsigned blob' do
permit_unsigned_manifests false
authorize_with :active
assert_response :success
assert_equal col.version, json_response['version'], 'Trashing a collection should not create a new version'
end
+
+ ["storage_classes_desired", "storage_classes_confirmed"].each do |attr|
+ test "filter collections by #{attr}" do
+ authorize_with(:active)
+ get :index, params: {
+ filters: [[attr, "=", '["default"]']]
+ }
+ assert_response :success
+ assert_not_equal 0, json_response["items"].length
+ json_response["items"].each do |c|
+ assert_equal ["default"], c[attr]
+ end
+ end
+ end
end
json_response['errors'].join(' '))
end
- test 'error message for full text search on a specific column' do
+ test 'error message for unsupported full text search' do
@controller = Arvados::V1::CollectionsController.new
authorize_with :active
get :index, params: {
filters: [['uuid', '@@', 'abcdef']],
}
assert_response 422
- assert_match(/not supported/, json_response['errors'].join(' '))
- end
-
- test 'difficult characters in full text search' do
- @controller = Arvados::V1::CollectionsController.new
- authorize_with :active
- get :index, params: {
- filters: [['any', '@@', 'a|b"c']],
- }
- assert_response :success
- # (Doesn't matter so much which results are returned.)
- end
-
- test 'array operand in full text search' do
- @controller = Arvados::V1::CollectionsController.new
- authorize_with :active
- get :index, params: {
- filters: [['any', '@@', ['abc', 'def']]],
- }
- assert_response 422
- assert_match(/not supported/, json_response['errors'].join(' '))
+ assert_match(/no longer supported/, json_response['errors'].join(' '))
end
test 'api responses provide timestamps with nanoseconds' do
end
end
- test "full text search with count='none'" do
- @controller = Arvados::V1::GroupsController.new
- authorize_with :admin
-
- get :contents, params: {
- format: :json,
- count: 'none',
- limit: 1000,
- filters: [['any', '@@', Rails.configuration.ClusterID]],
- }
-
- assert_response :success
-
- all_objects = Hash.new(0)
- json_response['items'].map{|o| o['kind']}.each{|t| all_objects[t] += 1}
-
- assert_equal true, all_objects['arvados#group']>0
- assert_equal true, all_objects['arvados#job']>0
- assert_equal true, all_objects['arvados#pipelineInstance']>0
- assert_equal true, all_objects['arvados#pipelineTemplate']>0
-
- # Perform test again mimicking a second page request with:
- # last_object_class = PipelineInstance
- # and hence groups and jobs should not be included in the response
- # offset = 5, which means first 5 pipeline instances were already received in page 1
- # and hence the remaining pipeline instances and all other object types should be included in the response
-
- @test_counter = 0 # Reset executed action counter
-
- @controller = Arvados::V1::GroupsController.new
-
- get :contents, params: {
- format: :json,
- count: 'none',
- limit: 1000,
- offset: '5',
- last_object_class: 'PipelineInstance',
- filters: [['any', '@@', Rails.configuration.ClusterID]],
- }
-
- assert_response :success
-
- second_page = Hash.new(0)
- json_response['items'].map{|o| o['kind']}.each{|t| second_page[t] += 1}
-
- assert_equal false, second_page.include?('arvados#group')
- assert_equal false, second_page.include?('arvados#job')
- assert_equal true, second_page['arvados#pipelineInstance']>0
- assert_equal all_objects['arvados#pipelineInstance'], second_page['arvados#pipelineInstance']+5
- assert_equal true, second_page['arvados#pipelineTemplate']>0
- end
-
[['prop1', '=', 'value1', [:collection_with_prop1_value1], [:collection_with_prop1_value2, :collection_with_prop2_1]],
['prop1', '!=', 'value1', [:collection_with_prop1_value2, :collection_with_prop2_1], [:collection_with_prop1_value1]],
['prop1', 'exists', true, [:collection_with_prop1_value1, :collection_with_prop1_value2, :collection_with_prop1_value3, :collection_with_prop1_other1], [:collection_with_prop2_1]],
assert_includes(found, collections(:replication_desired_2_unconfirmed).uuid)
assert_includes(found, collections(:replication_desired_2_confirmed_2).uuid)
end
+
+ [
+ [1, "foo"],
+ [1, ["foo"]],
+ [1, ["bar"]],
+ [1, ["bar", "foo"]],
+ [0, ["foo", "qux"]],
+ [0, ["qux"]],
+ [nil, []],
+ [nil, [[]]],
+ [nil, [["bogus"]]],
+ [nil, [{"foo" => "bar"}]],
+ [nil, {"foo" => "bar"}],
+ ].each do |results, operand|
+ test "storage_classes_desired contains #{operand.inspect}" do
+ @controller = Arvados::V1::CollectionsController.new
+ authorize_with(:active)
+ c = Collection.create!(
+ manifest_text: "",
+ storage_classes_desired: ["foo", "bar", "baz"])
+ get :index, params: {
+ filters: [["storage_classes_desired", "contains", operand]],
+ }
+ if results.nil?
+ assert_response 422
+ next
+ end
+ assert_response :success
+ assert_equal results, json_response["items"].length
+ if results > 0
+ assert_equal c.uuid, json_response["items"][0]["uuid"]
+ end
+ end
+ end
+
+ test "collections properties contains top level key" do
+ @controller = Arvados::V1::CollectionsController.new
+ authorize_with(:active)
+ get :index, params: {
+ filters: [["properties", "contains", "prop1"]],
+ }
+ assert_response :success
+ assert_not_empty json_response["items"]
+ json_response["items"].each do |c|
+ assert c["properties"].has_key?("prop1")
+ end
+ end
end
test "create collection, update manifest, and search with filename" do
# create collection
- signed_manifest = Collection.sign_manifest(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active))
+ signed_manifest = Collection.sign_manifest_only_for_tests(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active))
post "/arvados/v1/collections",
params: {
format: :json,
search_using_filter 'my_test_file.txt', 1
# update the collection's manifest text
- signed_manifest = Collection.sign_manifest(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_updated_test_file.txt\n", api_token(:active))
+ signed_manifest = Collection.sign_manifest_only_for_tests(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_updated_test_file.txt\n", api_token(:active))
put "/arvados/v1/collections/#{created['uuid']}",
params: {
format: :json,
end
end
- test "search collection using full text search" do
- # create collection to be searched for
- signed_manifest = Collection.sign_manifest(". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1_in_subdir3.txt 32:32:file2_in_subdir3.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file3_in_subdir4.txt 32:32:file4_in_subdir4.txt\n", api_token(:active))
- post "/arvados/v1/collections",
- params: {
- format: :json,
- collection: {description: 'specific collection description', manifest_text: signed_manifest}.to_json,
- },
- headers: auth(:active)
- assert_response :success
- assert_equal true, json_response['manifest_text'].include?('file4_in_subdir4.txt')
-
- # search using the filename
- search_using_full_text_search 'subdir2', 0
- search_using_full_text_search 'subdir2:*', 1
- search_using_full_text_search 'subdir2/subdir3/subdir4', 1
- search_using_full_text_search 'file4:*', 1
- search_using_full_text_search 'file4_in_subdir4.txt', 1
- search_using_full_text_search 'subdir2 file4:*', 0 # first word is incomplete
- search_using_full_text_search 'subdir2/subdir3/subdir4 file4:*', 1
- search_using_full_text_search 'subdir2/subdir3/subdir4 file4_in_subdir4.txt', 1
- search_using_full_text_search 'ile4', 0 # not a prefix match
- end
-
- def search_using_full_text_search search_filter, expected_items
- get '/arvados/v1/collections',
- params: {:filters => [['any', '@@', search_filter]].to_json},
- headers: auth(:active)
- assert_response :success
- response_items = json_response['items']
- assert_not_nil response_items
- if expected_items == 0
- assert_empty response_items
- else
- refute_empty response_items
- first_item = response_items.first
- assert_not_nil first_item
- end
- end
-
- # search for the filename in the file_names column and expect error
- test "full text search not supported for individual columns" do
- get '/arvados/v1/collections',
- params: {:filters => [['name', '@@', 'General']].to_json},
- headers: auth(:active)
- assert_response 422
- end
-
- [
- 'quick fox',
- 'quick_brown fox',
- 'brown_ fox',
- 'fox dogs',
- ].each do |search_filter|
- test "full text search ignores special characters and finds with filter #{search_filter}" do
- # description: The quick_brown_fox jumps over the lazy_dog
- # full text search treats '_' as space apparently
- get '/arvados/v1/collections',
- params: {:filters => [['any', '@@', search_filter]].to_json},
- headers: auth(:active)
- assert_response 200
- response_items = json_response['items']
- assert_not_nil response_items
- first_item = response_items.first
- refute_empty first_item
- assert_equal first_item['description'], 'The quick_brown_fox jumps over the lazy_dog'
- end
- end
-
test "create and get collection with properties" do
# create collection to be searched for
- signed_manifest = Collection.sign_manifest(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active))
+ signed_manifest = Collection.sign_manifest_only_for_tests(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active))
post "/arvados/v1/collections",
params: {
format: :json,
test "create collection and update it with json encoded hash properties" do
# create collection to be searched for
- signed_manifest = Collection.sign_manifest(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active))
+ signed_manifest = Collection.sign_manifest_only_for_tests(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active))
post "/arvados/v1/collections",
params: {
format: :json,
Rails.configuration.Collections.CollectionVersioning = true
Rails.configuration.Collections.PreserveVersionIfIdle = -1 # Disable auto versioning
- signed_manifest = Collection.sign_manifest(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active))
+ signed_manifest = Collection.sign_manifest_only_for_tests(". bad42fa702ae3ea7d888fef11b46f450+44 0:44:my_test_file.txt\n", api_token(:active))
post "/arvados/v1/collections",
params: {
format: :json,
end
end
- [
- ['Collection_', true], # collections and pipelines templates
- ['hash', true], # pipeline templates
- ['fa7aeb5140e2848d39b', false], # script_parameter of pipeline instances
- ['fa7aeb5140e2848d39b:*', true], # script_parameter of pipeline instances
- ['project pipeline', true], # finds "Completed pipeline in A Project"
- ['project pipeli:*', true], # finds "Completed pipeline in A Project"
- ['proje pipeli:*', false], # first word is incomplete, so no prefix match
- ['no-such-thing', false], # script_parameter of pipeline instances
- ].each do |search_filter, expect_results|
- test "full text search of group-owned objects for #{search_filter}" do
- get "/arvados/v1/groups/contents",
- params: {
- id: groups(:aproject).uuid,
- limit: 5,
- :filters => [['any', '@@', search_filter]].to_json
- },
- headers: auth(:active)
- assert_response :success
- if expect_results
- refute_empty json_response['items']
- json_response['items'].each do |item|
- assert item['uuid']
- assert_equal groups(:aproject).uuid, item['owner_uuid']
- end
- else
- assert_empty json_response['items']
- end
- end
- end
-
- test "full text search is not supported for individual columns" do
- get "/arvados/v1/groups/contents",
- params: {
- :filters => [['name', '@@', 'Private']].to_json
- },
- headers: auth(:active)
- assert_response 422
- end
-
test "group contents with include trash collections" do
get "/arvados/v1/groups/contents",
params: {
end
end
- test "full text search index exists on models" do
- indexes = {}
- conn = ActiveRecord::Base.connection
- conn.exec_query("SELECT i.relname as indname,
- i.relowner as indowner,
- idx.indrelid::regclass::text as table,
- am.amname as indam,
- idx.indkey,
- ARRAY(
- SELECT pg_get_indexdef(idx.indexrelid, k + 1, true)
- FROM generate_subscripts(idx.indkey, 1) as k
- ORDER BY k
- ) as keys,
- idx.indexprs IS NOT NULL as indexprs,
- idx.indpred IS NOT NULL as indpred
- FROM pg_index as idx
- JOIN pg_class as i
- ON i.oid = idx.indexrelid
- JOIN pg_am as am
- ON i.relam = am.oid
- JOIN pg_namespace as ns
- ON ns.oid = i.relnamespace
- AND ns.nspname = ANY(current_schemas(false))").each do |idx|
- if idx['keys'].match(/to_tsvector/)
- indexes[idx['table']] ||= []
- indexes[idx['table']] << idx
- end
- end
- fts_tables = ["collections", "container_requests", "groups", "jobs",
- "pipeline_instances", "pipeline_templates", "workflows"]
- fts_tables.each do |table|
- table_class = table.classify.constantize
- if table_class.respond_to?('full_text_searchable_columns')
- expect = table_class.full_text_searchable_columns
- ok = false
- indexes[table].andand.each do |idx|
- if expect == idx['keys'].scan(/COALESCE\(([A-Za-z_]+)/).flatten
- ok = true
- end
- end
- assert ok, "#{table} has no full-text index\nexpect: #{expect.inspect}\nfound: #{indexes[table].inspect}"
- end
- end
- end
-
[
%w[collections collections_trgm_text_search_idx],
%w[container_requests container_requests_trgm_text_search_idx],
c = time_block 'read' do
Collection.find_by_uuid(c.uuid)
end
- time_block 'sign' do
- c.signed_manifest_text
- end
- time_block 'sign + render' do
+ time_block 'render' do
c.as_api_response(nil)
end
loc = Blob.sign_locator(Digest::MD5.hexdigest('foo') + '+3',
test "clear replication_confirmed* when introducing a new block in manifest" do
c = collections(:replication_desired_2_confirmed_2)
act_as_user users(:active) do
- assert c.update_attributes(manifest_text: collections(:user_agreement).signed_manifest_text)
+ assert c.update_attributes(manifest_text: collections(:user_agreement).signed_manifest_text_only_for_tests)
assert_nil c.replication_confirmed
assert_nil c.replication_confirmed_at
end
test "don't clear replication_confirmed* when just renaming a file" do
c = collections(:replication_desired_2_confirmed_2)
act_as_user users(:active) do
- new_manifest = c.signed_manifest_text.sub(':bar', ':foo')
+ new_manifest = c.signed_manifest_text_only_for_tests.sub(':bar', ':foo')
assert c.update_attributes(manifest_text: new_manifest)
assert_equal 2, c.replication_confirmed
assert_not_nil c.replication_confirmed_at
test "don't clear replication_confirmed* when just deleting a data block" do
c = collections(:replication_desired_2_confirmed_2)
act_as_user users(:active) do
- new_manifest = c.signed_manifest_text
+ new_manifest = c.signed_manifest_text_only_for_tests
new_manifest.sub!(/ \S+:bar/, '')
new_manifest.sub!(/ acbd\S+/, '')
# Confirm that we did just remove a block from the manifest (if
# not, this test would pass without testing the relevant case):
- assert_operator new_manifest.length+40, :<, c.signed_manifest_text.length
+ assert_operator new_manifest.length+40, :<, c.signed_manifest_text_only_for_tests.length
assert c.update_attributes(manifest_text: new_manifest)
assert_equal 2, c.replication_confirmed
c = Collection.create!(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n", name: 'foo')
c.update_attributes! trash_at: (t0 + 1.hours)
c.reload
- sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text)[1].to_i
+ sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text_only_for_tests)[1].to_i
assert_operator sig_exp.to_i, :<=, (t0 + 1.hours).to_i
end
end
c = Collection.create!(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n",
name: 'foo',
trash_at: db_current_time + 1.years)
- sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text)[1].to_i
+ sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text_only_for_tests)[1].to_i
expect_max_sig_exp = db_current_time.to_i + Rails.configuration.Collections.BlobSigningTTL.to_i
assert_operator c.trash_at.to_i, :>, expect_max_sig_exp
assert_operator sig_exp.to_i, :<=, expect_max_sig_exp
cluster *arvados.Cluster
}
-func (s *AuthHandlerSuite) SetUpSuite(c *check.C) {
- arvadostest.StartAPI()
-}
-
-func (s *AuthHandlerSuite) TearDownSuite(c *check.C) {
- arvadostest.StopAPI()
-}
-
func (s *AuthHandlerSuite) SetUpTest(c *check.C) {
arvadostest.ResetEnv()
repoRoot, err := filepath.Abs("../api/tmp/git/test")
cluster *arvados.Cluster
}
-func (s *IntegrationSuite) SetUpSuite(c *check.C) {
- arvadostest.StartAPI()
-}
-
-func (s *IntegrationSuite) TearDownSuite(c *check.C) {
- arvadostest.StopAPI()
-}
-
func (s *IntegrationSuite) SetUpTest(c *check.C) {
arvadostest.ResetEnv()
func (s *TestSuite) SetUpSuite(c *C) {
initialArgs = os.Args
- arvadostest.StartAPI()
runningCmds = make(map[string]*exec.Cmd)
logrus.SetFormatter(&logrus.TextFormatter{DisableColors: true})
}
-func (s *TestSuite) TearDownSuite(c *C) {
- arvadostest.StopAPI()
-}
-
func (s *TestSuite) SetUpTest(c *C) {
args := []string{"crunch-dispatch-local"}
os.Args = args
}
func (s *IntegrationSuite) SetUpTest(c *C) {
- arvadostest.StartAPI()
+ arvadostest.ResetEnv()
+ arvadostest.ResetDB(c)
os.Setenv("ARVADOS_API_TOKEN", arvadostest.Dispatch1Token)
s.disp = Dispatcher{}
s.disp.cluster = &arvados.Cluster{}
func (s *IntegrationSuite) TearDownTest(c *C) {
arvadostest.ResetEnv()
- arvadostest.StopAPI()
+ arvadostest.ResetDB(c)
}
type slurmFake struct {
c.Skip("-short")
}
arvadostest.ResetEnv()
- arvadostest.StartAPI()
arvadostest.StartKeep(4, true)
arv, err := arvadosclient.MakeArvadosClient()
c.Skip("-short")
}
arvadostest.StopKeep(4)
- arvadostest.StopAPI()
}
func (s *integrationSuite) SetUpTest(c *check.C) {
}
func (s *IntegrationSuite) SetUpSuite(c *check.C) {
- arvadostest.StartAPI()
+ arvadostest.ResetDB(c)
arvadostest.StartKeep(2, true)
arv, err := arvadosclient.MakeArvadosClient()
func (s *IntegrationSuite) TearDownSuite(c *check.C) {
arvadostest.StopKeep(2)
- arvadostest.StopAPI()
}
func (s *IntegrationSuite) SetUpTest(c *check.C) {
}
func (s *ServerRequiredSuite) SetUpSuite(c *C) {
- arvadostest.StartAPI()
arvadostest.StartKeep(2, false)
}
func (s *ServerRequiredSuite) TearDownSuite(c *C) {
arvadostest.StopKeep(2)
- arvadostest.StopAPI()
}
func (s *ServerRequiredConfigYmlSuite) SetUpSuite(c *C) {
- arvadostest.StartAPI()
// config.yml defines 4 keepstores
arvadostest.StartKeep(4, false)
}
func (s *ServerRequiredConfigYmlSuite) TearDownSuite(c *C) {
arvadostest.StopKeep(4)
- arvadostest.StopAPI()
}
func (s *NoKeepServerSuite) SetUpSuite(c *C) {
- arvadostest.StartAPI()
// We need API to have some keep services listed, but the
// services themselves should be unresponsive.
arvadostest.StartKeep(2, false)
arvadostest.ResetEnv()
}
-func (s *NoKeepServerSuite) TearDownSuite(c *C) {
- arvadostest.StopAPI()
-}
-
func runProxy(c *C, bogusClientToken bool, loadKeepstoresFromConfig bool, kp *arvados.UploadDownloadRolePermissions) (*keepclient.KeepClient, *bytes.Buffer) {
cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
c.Assert(err, Equals, nil)
"os"
"sort"
"strings"
+ "sync/atomic"
"time"
"git.arvados.org/arvados.git/lib/config"
}
}
+func (s *HandlerSuite) TestPutWithNoWritableVolumes(c *check.C) {
+ s.cluster.Volumes = map[string]arvados.Volume{
+ "zzzzz-nyw5e-111111111111111": {
+ Driver: "mock",
+ Replication: 1,
+ ReadOnly: true,
+ StorageClasses: map[string]bool{"class1": true}},
+ }
+ c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
+ resp := IssueRequest(s.handler,
+ &RequestTester{
+ method: "PUT",
+ uri: "/" + TestHash,
+ requestBody: TestBlock,
+ storageClasses: "class1",
+ })
+ c.Check(resp.Code, check.Equals, FullError.HTTPCode)
+ c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Put"), check.Equals, 0)
+}
+
+func (s *HandlerSuite) TestConcurrentWritesToMultipleStorageClasses(c *check.C) {
+ s.cluster.Volumes = map[string]arvados.Volume{
+ "zzzzz-nyw5e-111111111111111": {
+ Driver: "mock",
+ Replication: 1,
+ StorageClasses: map[string]bool{"class1": true}},
+ "zzzzz-nyw5e-121212121212121": {
+ Driver: "mock",
+ Replication: 1,
+ StorageClasses: map[string]bool{"class1": true, "class2": true}},
+ "zzzzz-nyw5e-222222222222222": {
+ Driver: "mock",
+ Replication: 1,
+ StorageClasses: map[string]bool{"class2": true}},
+ }
+
+ for _, trial := range []struct {
+ setCounter uint32 // value to stuff vm.counter, to control offset
+ classes string // desired classes
+ put111 int // expected number of "put" ops on 11111... after 2x put reqs
+ put121 int // expected number of "put" ops on 12121...
+ put222 int // expected number of "put" ops on 22222...
+ cmp111 int // expected number of "compare" ops on 11111... after 2x put reqs
+ cmp121 int // expected number of "compare" ops on 12121...
+ cmp222 int // expected number of "compare" ops on 22222...
+ }{
+ {0, "class1",
+ 1, 0, 0,
+ 2, 1, 0}, // first put compares on all vols with class2; second put succeeds after checking 121
+ {0, "class2",
+ 0, 1, 0,
+ 0, 2, 1}, // first put compares on all vols with class2; second put succeeds after checking 121
+ {0, "class1,class2",
+ 1, 1, 0,
+ 2, 2, 1}, // first put compares on all vols; second put succeeds after checking 111 and 121
+ {1, "class1,class2",
+ 0, 1, 0, // vm.counter offset is 1 so the first volume attempted is 121
+ 2, 2, 1}, // first put compares on all vols; second put succeeds after checking 111 and 121
+ {0, "class1,class2,class404",
+ 1, 1, 0,
+ 2, 2, 1}, // first put compares on all vols; second put doesn't compare on 222 because it already satisfied class2 on 121
+ } {
+ c.Logf("%+v", trial)
+ s.cluster.StorageClasses = map[string]arvados.StorageClassConfig{
+ "class1": {},
+ "class2": {},
+ "class3": {},
+ }
+ c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
+ atomic.StoreUint32(&s.handler.volmgr.counter, trial.setCounter)
+ for i := 0; i < 2; i++ {
+ IssueRequest(s.handler,
+ &RequestTester{
+ method: "PUT",
+ uri: "/" + TestHash,
+ requestBody: TestBlock,
+ storageClasses: trial.classes,
+ })
+ }
+ c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put111)
+ c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-121212121212121"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put121)
+ c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-222222222222222"].Volume.(*MockVolume).CallCount("Put"), check.Equals, trial.put222)
+ c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp111)
+ c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-121212121212121"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp121)
+ c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-222222222222222"].Volume.(*MockVolume).CallCount("Compare"), check.Equals, trial.cmp222)
+ }
+}
+
// Test TOUCH requests.
func (s *HandlerSuite) TestTouchHandler(c *check.C) {
c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
"strconv"
"strings"
"sync"
+ "sync/atomic"
"time"
"git.arvados.org/arvados.git/sdk/go/arvados"
}
type putProgress struct {
+ classNeeded map[string]bool
classTodo map[string]bool
mountUsed map[*VolumeMount]bool
totalReplication int
func (pr *putProgress) Add(mnt *VolumeMount) {
if pr.mountUsed[mnt] {
- logrus.Warnf("BUG? superfluous extra write to mount %s", mnt)
+ logrus.Warnf("BUG? superfluous extra write to mount %s", mnt.UUID)
return
}
pr.mountUsed[mnt] = true
}
}
+func (pr *putProgress) Sub(mnt *VolumeMount) {
+ if !pr.mountUsed[mnt] {
+ logrus.Warnf("BUG? Sub called with no prior matching Add: %s", mnt.UUID)
+ return
+ }
+ pr.mountUsed[mnt] = false
+ pr.totalReplication -= mnt.Replication
+ for class := range mnt.StorageClasses {
+ pr.classDone[class] -= mnt.Replication
+ if pr.classNeeded[class] {
+ pr.classTodo[class] = true
+ }
+ }
+}
+
func (pr *putProgress) Done() bool {
return len(pr.classTodo) == 0 && pr.totalReplication > 0
}
return false
}
-func newPutResult(classes []string) putProgress {
+func (pr *putProgress) Copy() *putProgress {
+ cp := putProgress{
+ classNeeded: pr.classNeeded,
+ classTodo: make(map[string]bool, len(pr.classTodo)),
+ classDone: make(map[string]int, len(pr.classDone)),
+ mountUsed: make(map[*VolumeMount]bool, len(pr.mountUsed)),
+ totalReplication: pr.totalReplication,
+ }
+ for k, v := range pr.classTodo {
+ cp.classTodo[k] = v
+ }
+ for k, v := range pr.classDone {
+ cp.classDone[k] = v
+ }
+ for k, v := range pr.mountUsed {
+ cp.mountUsed[k] = v
+ }
+ return &cp
+}
+
+func newPutProgress(classes []string) putProgress {
pr := putProgress{
- classTodo: make(map[string]bool, len(classes)),
- classDone: map[string]int{},
- mountUsed: map[*VolumeMount]bool{},
+ classNeeded: make(map[string]bool, len(classes)),
+ classTodo: make(map[string]bool, len(classes)),
+ classDone: map[string]int{},
+ mountUsed: map[*VolumeMount]bool{},
}
for _, c := range classes {
if c != "" {
+ pr.classNeeded[c] = true
pr.classTodo[c] = true
}
}
return pr
}
-// PutBlock Stores the BLOCK (identified by the content id HASH) in Keep.
-//
-// PutBlock(ctx, block, hash)
-// Stores the BLOCK (identified by the content id HASH) in Keep.
-//
-// The MD5 checksum of the block must be identical to the content id HASH.
-// If not, an error is returned.
+// PutBlock stores the given block on one or more volumes.
//
-// PutBlock stores the BLOCK on the first Keep volume with free space.
-// A failure code is returned to the user only if all volumes fail.
+// The MD5 checksum of the block must match the given hash.
//
-// On success, PutBlock returns nil.
-// On failure, it returns a KeepError with one of the following codes:
+// The block is written to each writable volume (ordered by priority
+// and then UUID, see volume.go) until at least one replica has been
+// stored in each of the requested storage classes.
//
-// 500 Collision
-// A different block with the same hash already exists on this
-// Keep server.
-// 422 MD5Fail
-// The MD5 hash of the BLOCK does not match the argument HASH.
-// 503 Full
-// There was not enough space left in any Keep volume to store
-// the object.
-// 500 Fail
-// The object could not be stored for some other reason (e.g.
-// all writes failed). The text of the error message should
-// provide as much detail as possible.
+// The returned error, if any, is a KeepError with one of the
+// following codes:
//
+// 500 Collision
+// A different block with the same hash already exists on this
+// Keep server.
+// 422 MD5Fail
+// The MD5 hash of the BLOCK does not match the argument HASH.
+// 503 Full
+// There was not enough space left in any Keep volume to store
+// the object.
+// 500 Fail
+// The object could not be stored for some other reason (e.g.
+// all writes failed). The text of the error message should
+// provide as much detail as possible.
func PutBlock(ctx context.Context, volmgr *RRVolumeManager, block []byte, hash string, wantStorageClasses []string) (putProgress, error) {
log := ctxlog.FromContext(ctx)
return putProgress{}, RequestHashError
}
- result := newPutResult(wantStorageClasses)
+ result := newPutProgress(wantStorageClasses)
// If we already have this data, it's intact on disk, and we
// can update its timestamp, return success. If we have
// different data with the same hash, return failure.
- if err := CompareAndTouch(ctx, volmgr, hash, block, &result); err != nil {
+ if err := CompareAndTouch(ctx, volmgr, hash, block, &result); err != nil || result.Done() {
return result, err
}
if ctx.Err() != nil {
return result, ErrClientDisconnect
}
- // Choose a Keep volume to write to.
- // If this volume fails, try all of the volumes in order.
- if mnt := volmgr.NextWritable(); mnt == nil || !result.Want(mnt) {
- // fall through to "try all volumes" below
- } else if err := mnt.Put(ctx, hash, block); err != nil {
- log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash)
- } else {
- result.Add(mnt)
- if result.Done() {
- return result, nil
- }
- }
- if ctx.Err() != nil {
- return putProgress{}, ErrClientDisconnect
- }
-
- writables := volmgr.AllWritable()
+ writables := volmgr.NextWritable()
if len(writables) == 0 {
log.Error("no writable volumes")
- return putProgress{}, FullError
+ return result, FullError
}
- allFull := true
+ var wg sync.WaitGroup
+ var mtx sync.Mutex
+ cond := sync.Cond{L: &mtx}
+ // pending predicts what result will be if all pending writes
+ // succeed.
+ pending := result.Copy()
+ var allFull atomic.Value
+ allFull.Store(true)
+
+ // We hold the lock for the duration of the "each volume" loop
+ // below, except when it is released during cond.Wait().
+ mtx.Lock()
+
for _, mnt := range writables {
+ // Wait until our decision to use this mount does not
+ // depend on the outcome of pending writes.
+ for result.Want(mnt) && !pending.Want(mnt) {
+ cond.Wait()
+ }
if !result.Want(mnt) {
continue
}
- err := mnt.Put(ctx, hash, block)
- if ctx.Err() != nil {
- return result, ErrClientDisconnect
- }
- switch err {
- case nil:
- result.Add(mnt)
- if result.Done() {
- return result, nil
+ mnt := mnt
+ pending.Add(mnt)
+ wg.Add(1)
+ go func() {
+ log.Debugf("PutBlock: start write to %s", mnt.UUID)
+ defer wg.Done()
+ err := mnt.Put(ctx, hash, block)
+
+ mtx.Lock()
+ if err != nil {
+ log.Debugf("PutBlock: write to %s failed", mnt.UUID)
+ pending.Sub(mnt)
+ } else {
+ log.Debugf("PutBlock: write to %s succeeded", mnt.UUID)
+ result.Add(mnt)
}
- continue
- case FullError:
- continue
- default:
- // The volume is not full but the
- // write did not succeed. Report the
- // error and continue trying.
- allFull = false
- log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash)
- }
+ cond.Broadcast()
+ mtx.Unlock()
+
+ if err != nil && err != FullError && ctx.Err() == nil {
+ // The volume is not full but the
+ // write did not succeed. Report the
+ // error and continue trying.
+ allFull.Store(false)
+ log.WithError(err).Errorf("%s: Put(%s) failed", mnt.Volume, hash)
+ }
+ }()
+ }
+ mtx.Unlock()
+ wg.Wait()
+ if ctx.Err() != nil {
+ return result, ErrClientDisconnect
+ }
+ if result.Done() {
+ return result, nil
}
if result.totalReplication > 0 {
// Some, but not all, of the storage classes were
// satisfied. This qualifies as success.
return result, nil
- } else if allFull {
+ } else if allFull.Load().(bool) {
log.Error("all volumes with qualifying storage classes are full")
return putProgress{}, FullError
} else {
}
pullRequest := s.setupPullWorkerIntegrationTest(c, testData, false)
- defer arvadostest.StopAPI()
defer arvadostest.StopKeep(2)
s.performPullWorkerIntegrationTest(testData, pullRequest, c)
}
pullRequest := s.setupPullWorkerIntegrationTest(c, testData, true)
- defer arvadostest.StopAPI()
defer arvadostest.StopKeep(2)
s.performPullWorkerIntegrationTest(testData, pullRequest, c)
vm.writables = append(vm.writables, mnt)
}
}
- // pri(i): return highest priority of any storage class
- // offered by vm.readables[i]
- pri := func(i int) int {
+ // pri(mnt): return highest priority of any storage class
+ // offered by mnt
+ pri := func(mnt *VolumeMount) int {
any, best := false, 0
- for class := range vm.readables[i].KeepMount.StorageClasses {
+ for class := range mnt.KeepMount.StorageClasses {
if p := cluster.StorageClasses[class].Priority; !any || best < p {
best = p
any = true
}
return best
}
- // sort vm.readables, first by highest priority of any offered
+ // less(a,b): sort first by highest priority of any offered
// storage class (highest->lowest), then by volume UUID
- sort.Slice(vm.readables, func(i, j int) bool {
- if pi, pj := pri(i), pri(j); pi != pj {
- return pi > pj
+ less := func(a, b *VolumeMount) bool {
+ if pa, pb := pri(a), pri(b); pa != pb {
+ return pa > pb
} else {
- return vm.readables[i].KeepMount.UUID < vm.readables[j].KeepMount.UUID
+ return a.KeepMount.UUID < b.KeepMount.UUID
}
+ }
+ sort.Slice(vm.readables, func(i, j int) bool {
+ return less(vm.readables[i], vm.readables[j])
+ })
+ sort.Slice(vm.writables, func(i, j int) bool {
+ return less(vm.writables[i], vm.writables[j])
})
return vm, nil
}
return vm.readables
}
-// AllWritable returns an array of all writable volumes
+// AllWritable returns writable volumes, sorted by priority/uuid. Used
+// by CompareAndTouch to ensure higher-priority volumes are checked
+// first.
func (vm *RRVolumeManager) AllWritable() []*VolumeMount {
return vm.writables
}
-// NextWritable returns the next writable
-func (vm *RRVolumeManager) NextWritable() *VolumeMount {
+// NextWritable returns writable volumes, rotated by vm.counter so
+// each volume gets a turn to be first. Used by PutBlock to distribute
+// new data across available volumes.
+func (vm *RRVolumeManager) NextWritable() []*VolumeMount {
if len(vm.writables) == 0 {
return nil
}
- i := atomic.AddUint32(&vm.counter, 1)
- return vm.writables[i%uint32(len(vm.writables))]
+ offset := (int(atomic.AddUint32(&vm.counter, 1)) - 1) % len(vm.writables)
+ return append(append([]*VolumeMount(nil), vm.writables[offset:]...), vm.writables[:offset]...)
}
// VolumeStats returns an ioStats for the given volume.
# gnupg2 runit python3-pip python3-setuptools python3-yaml shellinabox netcat less
RUN apt-get update && \
apt-get -yq --no-install-recommends -o Acquire::Retries=6 install \
- gnupg2 runit python3-pip python3-setuptools python3-yaml shellinabox netcat less && \
+ gnupg2 runit python3-pip python3-setuptools python3-yaml shellinabox netcat less vim-tiny && \
apt-get clean
ENV GOPATH /var/lib/gopath
export VERSION=$(./version-at-commit.sh)
export BROWSER=none
export CI=true
+export HTTPS=false
node --version
exec node node_modules/react-scripts/scripts/start.js
var blobSignatureTTL = time.Duration(2*7*24) * time.Hour
-func (s *ServerRequiredSuite) SetUpSuite(c *C) {
- arvadostest.StartAPI()
-}
-
func (s *ServerRequiredSuite) TearDownSuite(c *C) {
- arvadostest.StopAPI()
arvadostest.ResetEnv()
}
type ServerRequiredSuite struct{}
-func (s *ServerRequiredSuite) SetUpSuite(c *C) {
- arvadostest.StartAPI()
-}
-
func (s *ServerRequiredSuite) TearDownSuite(c *C) {
- arvadostest.StopAPI()
arvadostest.ResetEnv()
}
cp -vr /vagrant/tests /home/vagrant/tests;
sed 's#cluster_fixme_or_this_wont_work#harpo#g;
s#domain_fixme_or_this_wont_work#local#g;
- s/#\ BRANCH=\"master\"/\ BRANCH=\"master\"/g;
+ s/#\ BRANCH=\"main\"/\ BRANCH=\"main\"/g;
s#CONTROLLER_EXT_SSL_PORT=443#CONTROLLER_EXT_SSL_PORT=8443#g' \
/vagrant/local.params.example.single_host_multiple_hostnames > /tmp/local.params.single_host_multiple_hostnames"
arv.vm.provision "shell",
cp -vr /vagrant/tests /home/vagrant/tests;
sed 's#HOSTNAME_EXT=\"\"#HOSTNAME_EXT=\"zeppo.local\"#g;
s#cluster_fixme_or_this_wont_work#zeppo#g;
- s/#\ BRANCH=\"master\"/\ BRANCH=\"master\"/g;
+ s/#\ BRANCH=\"main\"/\ BRANCH=\"main\"/g;
s#domain_fixme_or_this_wont_work#local#g;' \
/vagrant/local.params.example.single_host_single_hostname > /tmp/local.params.single_host_single_hostname"
arv.vm.provision "shell",
+# -*- coding: utf-8 -*-
+# vim: ft=yaml
---
# Copyright (C) The Arvados Authors. All rights reserved.
#
## manage OS packages with some other tool and you don't want us messing up
## with your setup.
ruby:
+
## We set these to `true` here for testing purposes.
## They both default to `false`.
manage_ruby: true
host: 127.0.0.1
password: "__DATABASE_PASSWORD__"
user: __CLUSTER___arvados
- encoding: en_US.utf8
- client_encoding: UTF8
+ extra_conn_params:
+ client_encoding: UTF8
+ # Centos7 does not enable SSL by default, so we disable
+ # it here just for testing of the formula purposes only.
+ # You should not do this in production, and should
+ # configure Postgres certificates correctly
+ {%- if grains.os_family in ('RedHat',) %}
+ sslmode: disable
+ {%- endif %}
tls:
# certificate: ''
# required to test with arvados-snakeoil certs
insecure: true
+ resources:
+ virtual_machines:
+ shell:
+ name: webshell
+ backend: 127.0.1.1
+ port: 4200
+
### TOKENS
tokens:
system_root: __SYSTEM_ROOT_TOKEN__
#
# SPDX-License-Identifier: AGPL-3.0
+{%- if grains.os_family in ('RedHat',) %}
+ {%- set group = 'nginx' %}
+{%- else %}
+ {%- set group = 'www-data' %}
+{%- endif %}
+
### ARVADOS
arvados:
config:
- group: www-data
+ group: {{ group }}
### NGINX
nginx:
### SITES
servers:
managed:
- arvados_api:
+ arvados_api.conf:
enabled: true
overwrite: true
config:
servers:
managed:
### DEFAULT
- arvados_controller_default:
+ arvados_controller_default.conf:
enabled: true
overwrite: true
config:
- location /:
- return: '301 https://$host$request_uri'
- arvados_controller_ssl:
+ arvados_controller_ssl.conf:
enabled: true
overwrite: true
+ requires:
+ file: nginx_snippet_arvados-snakeoil.conf
config:
- server:
- server_name: __CLUSTER__.__DOMAIN__
- proxy_set_header: 'X-Real-IP $remote_addr'
- proxy_set_header: 'X-Forwarded-For $proxy_add_x_forwarded_for'
- proxy_set_header: 'X-External-Client $external_client'
- - include: 'snippets/arvados-snakeoil.conf'
+ - include: snippets/ssl_hardening_default.conf
+ - include: snippets/arvados-snakeoil.conf
- access_log: /var/log/nginx/__CLUSTER__.__DOMAIN__.access.log combined
- error_log: /var/log/nginx/__CLUSTER__.__DOMAIN__.error.log
- client_max_body_size: 128m
servers:
managed:
### DEFAULT
- arvados_keepproxy_default:
+ arvados_keepproxy_default.conf:
enabled: true
overwrite: true
config:
- location /:
- return: '301 https://$host$request_uri'
- arvados_keepproxy_ssl:
+ arvados_keepproxy_ssl.conf:
enabled: true
overwrite: true
+ requires:
+ file: nginx_snippet_arvados-snakeoil.conf
config:
- server:
- server_name: keep.__CLUSTER__.__DOMAIN__
- client_max_body_size: 64M
- proxy_http_version: '1.1'
- proxy_request_buffering: 'off'
- - include: 'snippets/arvados-snakeoil.conf'
+ - include: snippets/ssl_hardening_default.conf
+ - include: snippets/arvados-snakeoil.conf
- access_log: /var/log/nginx/keepproxy.__CLUSTER__.__DOMAIN__.access.log combined
- error_log: /var/log/nginx/keepproxy.__CLUSTER__.__DOMAIN__.error.log
servers:
managed:
### DEFAULT
- arvados_collections_download_default:
+ arvados_collections_download_default.conf:
enabled: true
overwrite: true
config:
- return: '301 https://$host$request_uri'
### COLLECTIONS / DOWNLOAD
- arvados_collections_download_ssl:
+ arvados_collections_download_ssl.conf:
enabled: true
overwrite: true
+ requires:
+ file: nginx_snippet_arvados-snakeoil.conf
config:
- server:
- server_name: collections.__CLUSTER__.__DOMAIN__ download.__CLUSTER__.__DOMAIN__
- client_max_body_size: 0
- proxy_http_version: '1.1'
- proxy_request_buffering: 'off'
- - include: 'snippets/arvados-snakeoil.conf'
+ - include: snippets/ssl_hardening_default.conf
+ - include: snippets/arvados-snakeoil.conf
- access_log: /var/log/nginx/collections.__CLUSTER__.__DOMAIN__.access.log combined
- error_log: /var/log/nginx/collections.__CLUSTER__.__DOMAIN__.error.log
#
# SPDX-License-Identifier: AGPL-3.0
+{%- set passenger_pkg = 'nginx-mod-http-passenger'
+ if grains.osfinger in ('CentOS Linux-7') else
+ 'libnginx-mod-http-passenger' %}
+{%- set passenger_mod = '/usr/lib64/nginx/modules/ngx_http_passenger_module.so'
+ if grains.osfinger in ('CentOS Linux-7',) else
+ '/usr/lib/nginx/modules/ngx_http_passenger_module.so' %}
+{%- set passenger_ruby = '/usr/local/rvm/rubies/ruby-2.7.2/bin/ruby'
+ if grains.osfinger in ('CentOS Linux-7', 'Ubuntu-18.04',) else
+ '/usr/bin/ruby' %}
+
### NGINX
nginx:
install_from_phusionpassenger: true
lookup:
- passenger_package: libnginx-mod-http-passenger
- passenger_config_file: /etc/nginx/conf.d/mod-http-passenger.conf
+ passenger_package: {{ passenger_pkg }}
+ ### PASSENGER
+ passenger:
+ passenger_ruby: {{ passenger_ruby }}
### SERVER
server:
config:
- include: 'modules-enabled/*.conf'
+ # This is required to get the passenger module loaded
+ # In Debian it can be done with this
+ # include: 'modules-enabled/*.conf'
+ load_module: {{ passenger_mod }}
+
worker_processes: 4
+ ### SNIPPETS
+ snippets:
+ # Based on https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.4
+ ssl_hardening_default.conf:
+ - ssl_session_timeout: 1d
+ - ssl_session_cache: 'shared:arvadosSSL:10m'
+ - ssl_session_tickets: 'off'
+
+ # intermediate configuration
+ - ssl_protocols: TLSv1.2 TLSv1.3
+ - ssl_ciphers: ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384
+ - ssl_prefer_server_ciphers: 'off'
+
+ # HSTS (ngx_http_headers_module is required) (63072000 seconds)
+ - add_header: 'Strict-Transport-Security "max-age=63072000" always'
+
+ # OCSP stapling
+ # FIXME! Stapling does not work with self-signed certificates, so disabling for tests
+ # - ssl_stapling: 'on'
+ # - ssl_stapling_verify: 'on'
+
+ # verify chain of trust of OCSP response using Root CA and Intermediate certs
+ # - ssl_trusted_certificate /path/to/root_CA_cert_plus_intermediates
+
+ # curl https://ssl-config.mozilla.org/ffdhe2048.txt > /path/to/dhparam
+ # - ssl_dhparam: /path/to/dhparam
+
+ # replace with the IP address of your resolver
+ # - resolver: 127.0.0.1
+
+ arvados-snakeoil.conf:
+ - ssl_certificate: /etc/ssl/private/arvados-snakeoil-cert.pem
+ - ssl_certificate_key: /etc/ssl/private/arvados-snakeoil-cert.key
+
### SITES
servers:
managed:
#
# SPDX-License-Identifier: AGPL-3.0
+# This parameter will be used here to generate a list of upstreams and vhosts.
+# This dict is here for convenience and should be managed some other way, but the
+# different ways of orchestration that can be used for this are outside the scope
+# of this formula and their examples.
+# These upstreams should match those defined in `arvados:cluster:resources:virtual_machines`
+{% set webshell_virtual_machines = {
+ 'shell': {
+ 'name': 'webshell',
+ 'backend': '127.0.1.1',
+ 'port': 4200,
+ }
+}
+%}
+
### NGINX
nginx:
### SERVER
### STREAMS
http:
- upstream webshell_upstream:
- - server: 'shell.internal:4200 fail_timeout=10s'
+ {%- for vm, params in webshell_virtual_machines.items() %}
+ {%- set vm_name = params.name | default(vm) %}
+ {%- set vm_backend = params.backend | default(vm_name) %}
+ {%- set vm_port = params.port | default(4200) %}
+
+ upstream {{ vm_name }}_upstream:
+ - server: '{{ vm_backend }}:{{ vm_port }} fail_timeout=10s'
+
+ {%- endfor %}
### SITES
servers:
managed:
- arvados_webshell_default:
+ arvados_webshell_default.conf:
enabled: true
overwrite: true
config:
- location /:
- return: '301 https://$host$request_uri'
- arvados_webshell_ssl:
+ arvados_webshell_ssl.conf:
enabled: true
overwrite: true
+ requires:
+ file: nginx_snippet_arvados-snakeoil.conf
config:
- server:
- server_name: webshell.__CLUSTER__.__DOMAIN__
- listen:
- __CONTROLLER_EXT_SSL_PORT__ http2 ssl
- index: index.html index.htm
- - location /shell.__CLUSTER__.__DOMAIN__:
- - proxy_pass: 'http://webshell_upstream'
+ {%- for vm, params in webshell_virtual_machines.items() %}
+ {%- set vm_name = params.name | default(vm) %}
+ - location /{{ vm_name }}:
+ - proxy_pass: 'http://{{ vm_name }}_upstream'
- proxy_read_timeout: 90
- proxy_connect_timeout: 90
- proxy_set_header: 'Host $http_host'
- add_header: "'Access-Control-Allow-Origin' '*'"
- add_header: "'Access-Control-Allow-Methods' 'GET, POST, OPTIONS'"
- add_header: "'Access-Control-Allow-Headers' 'DNT,X-CustomHeader,Keep-Alive,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type'"
-
- - include: 'snippets/arvados-snakeoil.conf'
+ {%- endfor %}
+ - include: snippets/ssl_hardening_default.conf
+ - include: snippets/arvados-snakeoil.conf
- access_log: /var/log/nginx/webshell.__CLUSTER__.__DOMAIN__.access.log combined
- error_log: /var/log/nginx/webshell.__CLUSTER__.__DOMAIN__.error.log
servers:
managed:
### DEFAULT
- arvados_websocket_default:
+ arvados_websocket_default.conf:
enabled: true
overwrite: true
config:
- location /:
- return: '301 https://$host$request_uri'
- arvados_websocket_ssl:
+ arvados_websocket_ssl.conf:
enabled: true
overwrite: true
+ requires:
+ file: nginx_snippet_arvados-snakeoil.conf
config:
- server:
- server_name: ws.__CLUSTER__.__DOMAIN__
- client_max_body_size: 64M
- proxy_http_version: '1.1'
- proxy_request_buffering: 'off'
- - include: 'snippets/arvados-snakeoil.conf'
+ - include: snippets/ssl_hardening_default.conf
+ - include: snippets/arvados-snakeoil.conf
- access_log: /var/log/nginx/ws.__CLUSTER__.__DOMAIN__.access.log combined
- error_log: /var/log/nginx/ws.__CLUSTER__.__DOMAIN__.error.log
---
# Copyright (C) The Arvados Authors. All rights reserved.
#
-# SPDX-License-Identifier: AGPL-3.0
+# SPDX-License-Identifier: Apache-2.0
+
+{%- if grains.os_family in ('RedHat',) %}
+ {%- set group = 'nginx' %}
+{%- else %}
+ {%- set group = 'www-data' %}
+{%- endif %}
### ARVADOS
arvados:
config:
- group: www-data
+ group: {{ group }}
### NGINX
nginx:
servers:
managed:
### DEFAULT
- arvados_workbench2_default:
+ arvados_workbench2_default.conf:
enabled: true
overwrite: true
config:
- location /:
- return: '301 https://$host$request_uri'
- arvados_workbench2_ssl:
+ arvados_workbench2_ssl.conf:
enabled: true
overwrite: true
+ requires:
+ file: nginx_snippet_arvados-snakeoil.conf
config:
- server:
- server_name: workbench2.__CLUSTER__.__DOMAIN__
- return: 503
- location /config.json:
- return: {{ "200 '" ~ '{"API_HOST":"__CLUSTER__.__DOMAIN__:__CONTROLLER_EXT_SSL_PORT__"}' ~ "'" }}
- - include: 'snippets/arvados-snakeoil.conf'
+ - include: snippets/ssl_hardening_default.conf
+ - include: snippets/arvados-snakeoil.conf
- access_log: /var/log/nginx/workbench2.__CLUSTER__.__DOMAIN__.access.log combined
- error_log: /var/log/nginx/workbench2.__CLUSTER__.__DOMAIN__.error.log
#
# SPDX-License-Identifier: AGPL-3.0
+{%- if grains.os_family in ('RedHat',) %}
+ {%- set group = 'nginx' %}
+{%- else %}
+ {%- set group = 'www-data' %}
+{%- endif %}
+
### ARVADOS
arvados:
config:
- group: www-data
+ group: {{ group }}
### NGINX
nginx:
servers:
managed:
### DEFAULT
- arvados_workbench_default:
+ arvados_workbench_default.conf:
enabled: true
overwrite: true
config:
- location /:
- return: '301 https://$host$request_uri'
- arvados_workbench_ssl:
+ arvados_workbench_ssl.conf:
enabled: true
overwrite: true
+ requires:
+ file: nginx_snippet_arvados-snakeoil.conf
config:
- server:
- server_name: workbench.__CLUSTER__.__DOMAIN__
- proxy_set_header: 'Host $http_host'
- proxy_set_header: 'X-Real-IP $remote_addr'
- proxy_set_header: 'X-Forwarded-For $proxy_add_x_forwarded_for'
- - include: 'snippets/arvados-snakeoil.conf'
+ - include: snippets/ssl_hardening_default.conf
+ - include: snippets/arvados-snakeoil.conf
- access_log: /var/log/nginx/workbench.__CLUSTER__.__DOMAIN__.access.log combined
- error_log: /var/log/nginx/workbench.__CLUSTER__.__DOMAIN__.error.log
- arvados_workbench_upstream:
+ arvados_workbench_upstream.conf:
enabled: true
overwrite: true
config:
### POSTGRESQL
postgres:
- use_upstream_repo: false
+ # Centos-7's postgres package is too old, so we need to force using upstream's
+ # This is not required in Debian's family as they already ship with PG +11
+ {%- if salt['grains.get']('os_family') == 'RedHat' %}
+ use_upstream_repo: true
+ version: '12'
+
+ pkgs_deps:
+ - libicu
+ - libxslt
+ - systemd-sysv
+
+ pkgs_extra:
+ - postgresql12-contrib
+
+ {%- else %}
pkgs_extra:
- postgresql-contrib
+ {%- endif %}
postgresconf: |-
listen_addresses = '*' # listen on all interfaces
+ #ssl = on
+ #ssl_cert_file = '/etc/ssl/certs/arvados-snakeoil-cert.pem'
+ #ssl_key_file = '/etc/ssl/private/arvados-snakeoil-cert.key'
acls:
- ['local', 'all', 'postgres', 'peer']
- ['local', 'all', 'all', 'peer']
# Copyright (C) The Arvados Authors. All rights reserved.
#
-# SPDX-License-Identifier: AGPL-3.0
+# SPDX-License-Identifier: Apache-2.0
{%- set curr_tpldir = tpldir %}
{%- set tpldir = 'arvados' %}
{%- from "arvados/map.jinja" import arvados with context %}
{%- set tpldir = curr_tpldir %}
-{%- set arvados_ca_cert_file = '/etc/ssl/certs/arvados-snakeoil-ca.pem' %}
+include:
+ - nginx.passenger
+ - nginx.config
+ - nginx.service
+
+# Debian uses different dirs for certs and keys, but being a Snake Oil example,
+# we'll keep it simple here.
+{%- set arvados_ca_cert_file = '/etc/ssl/private/arvados-snakeoil-ca.pem' %}
{%- set arvados_ca_key_file = '/etc/ssl/private/arvados-snakeoil-ca.key' %}
-{%- set arvados_cert_file = '/etc/ssl/certs/arvados-snakeoil-cert.pem' %}
+{%- set arvados_cert_file = '/etc/ssl/private/arvados-snakeoil-cert.pem' %}
{%- set arvados_csr_file = '/etc/ssl/private/arvados-snakeoil-cert.csr' %}
{%- set arvados_key_file = '/etc/ssl/private/arvados-snakeoil-cert.key' %}
- ca-certificates
arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run:
- # Taken from https://github.com/arvados/arvados/blob/main/tools/arvbox/lib/arvbox/docker/service/certificate/run
+ # Taken from https://github.com/arvados/arvados/blob/master/tools/arvbox/lib/arvbox/docker/service/certificate/run
cmd.run:
- name: |
# These dirs are not to CentOS-ish, but this is a helper script
- require:
- pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_dependencies_pkg_installed
- cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run
+ # We need this before we can add the nginx's snippet
+ - require_in:
+ - file: nginx_snippet_arvados-snakeoil.conf
{%- if grains.get('os_family') == 'Debian' %}
arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed:
- sls: postgres
arvados_test_salt_states_examples_single_host_snakeoil_certs_certs_permissions_cmd_run:
- cmd.run:
- - name: |
- chown root:ssl-cert {{ arvados_key_file }}
+ file.managed:
+ - name: {{ arvados_key_file }}
+ - owner: root
+ - group: ssl-cert
- require:
- cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_cert_cmd_run
- pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed
-{%- endif %}
-
-arvados_test_salt_states_examples_single_host_snakeoil_certs_nginx_snakeoil_file_managed:
- file.managed:
- - name: /etc/nginx/snippets/arvados-snakeoil.conf
- - contents: |
- ssl_certificate {{ arvados_cert_file }};
- ssl_certificate_key {{ arvados_key_file }};
- - watch_in:
- - service: nginx_service
- - require:
- - pkg: passenger_install
- - cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_certs_permissions_cmd_run
- require_in:
- - file: nginx_config
- - service: nginx_service
- - watch_in:
- - service: nginx_service
-
-
+ - file: nginx_snippet_arvados-snakeoil.conf
+{%- endif %}
#
# SPDX-License-Identifier: AGPL-3.0
+{%- set passenger_pkg = 'nginx-mod-http-passenger'
+ if grains.osfinger in ('CentOS Linux-7') else
+ 'libnginx-mod-http-passenger' %}
+{%- set passenger_mod = '/usr/lib64/nginx/modules/ngx_http_passenger_module.so'
+ if grains.osfinger in ('CentOS Linux-7',) else
+ '/usr/lib/nginx/modules/ngx_http_passenger_module.so' %}
+{%- set passenger_ruby = '/usr/local/rvm/rubies/ruby-2.7.2/bin/ruby'
+ if grains.osfinger in ('CentOS Linux-7', 'Ubuntu-18.04',) else
+ '/usr/bin/ruby' %}
+
### NGINX
nginx:
install_from_phusionpassenger: true
lookup:
- passenger_package: libnginx-mod-http-passenger
- passenger_config_file: /etc/nginx/conf.d/mod-http-passenger.conf
+ passenger_package: {{ passenger_pkg }}
+ ### PASSENGER
+ passenger:
+ passenger_ruby: {{ passenger_ruby }}
### SERVER
server:
config:
- include: 'modules-enabled/*.conf'
+ # This is required to get the passenger module loaded
+ # In Debian it can be done with this
+ # include: 'modules-enabled/*.conf'
+ load_module: {{ passenger_mod }}
+
worker_processes: 4
+ ### SNIPPETS
+ snippets:
+ # Based on https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.4
+ ssl_hardening_default.conf:
+ - ssl_session_timeout: 1d
+ - ssl_session_cache: 'shared:arvadosSSL:10m'
+ - ssl_session_tickets: 'off'
+
+ # intermediate configuration
+ - ssl_protocols: TLSv1.2 TLSv1.3
+ - ssl_ciphers: ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384
+ - ssl_prefer_server_ciphers: 'off'
+
+ # HSTS (ngx_http_headers_module is required) (63072000 seconds)
+ - add_header: 'Strict-Transport-Security "max-age=63072000" always'
+
+ # OCSP stapling
+ # FIXME! Stapling does not work with self-signed certificates, so disabling for tests
+ # - ssl_stapling: 'on'
+ # - ssl_stapling_verify: 'on'
+
+ # verify chain of trust of OCSP response using Root CA and Intermediate certs
+ # - ssl_trusted_certificate /path/to/root_CA_cert_plus_intermediates
+
+ # curl https://ssl-config.mozilla.org/ffdhe2048.txt > /path/to/dhparam
+ # - ssl_dhparam: /path/to/dhparam
+
+ # replace with the IP address of your resolver
+ # - resolver: 127.0.0.1
+
+ arvados-snakeoil.conf:
+ - ssl_certificate: /etc/ssl/private/arvados-snakeoil-cert.pem
+ - ssl_certificate_key: /etc/ssl/private/arvados-snakeoil-cert.key
+
### SITES
servers:
managed:
# Copyright (C) The Arvados Authors. All rights reserved.
#
-# SPDX-License-Identifier: AGPL-3.0
+# SPDX-License-Identifier: Apache-2.0
{%- set curr_tpldir = tpldir %}
{%- set tpldir = 'arvados' %}
{%- from "arvados/map.jinja" import arvados with context %}
{%- set tpldir = curr_tpldir %}
-{%- set arvados_ca_cert_file = '/etc/ssl/certs/arvados-snakeoil-ca.pem' %}
+include:
+ - nginx.passenger
+ - nginx.config
+ - nginx.service
+
+# Debian uses different dirs for certs and keys, but being a Snake Oil example,
+# we'll keep it simple here.
+{%- set arvados_ca_cert_file = '/etc/ssl/private/arvados-snakeoil-ca.pem' %}
{%- set arvados_ca_key_file = '/etc/ssl/private/arvados-snakeoil-ca.key' %}
-{%- set arvados_cert_file = '/etc/ssl/certs/arvados-snakeoil-cert.pem' %}
+{%- set arvados_cert_file = '/etc/ssl/private/arvados-snakeoil-cert.pem' %}
{%- set arvados_csr_file = '/etc/ssl/private/arvados-snakeoil-cert.csr' %}
{%- set arvados_key_file = '/etc/ssl/private/arvados-snakeoil-cert.key' %}
- ca-certificates
arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run:
- # Taken from https://github.com/arvados/arvados/blob/main/tools/arvbox/lib/arvbox/docker/service/certificate/run
+ # Taken from https://github.com/arvados/arvados/blob/master/tools/arvbox/lib/arvbox/docker/service/certificate/run
cmd.run:
- name: |
# These dirs are not to CentOS-ish, but this is a helper script
- require:
- pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_dependencies_pkg_installed
- cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_ca_cmd_run
+ # We need this before we can add the nginx's snippet
+ - require_in:
+ - file: nginx_snippet_arvados-snakeoil.conf
{%- if grains.get('os_family') == 'Debian' %}
arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed:
- sls: postgres
arvados_test_salt_states_examples_single_host_snakeoil_certs_certs_permissions_cmd_run:
- cmd.run:
- - name: |
- chown root:ssl-cert {{ arvados_key_file }}
+ file.managed:
+ - name: {{ arvados_key_file }}
+ - owner: root
+ - group: ssl-cert
- require:
- cmd: arvados_test_salt_states_examples_single_host_snakeoil_certs_arvados_snake_oil_cert_cmd_run
- pkg: arvados_test_salt_states_examples_single_host_snakeoil_certs_ssl_cert_pkg_installed
-{%- endif %}
-
-arvados_test_salt_states_examples_single_host_snakeoil_certs_nginx_snakeoil_file_managed:
- file.managed:
- - name: /etc/nginx/snippets/arvados-snakeoil.conf
- - contents: |
- ssl_certificate {{ arvados_cert_file }};
- ssl_certificate_key {{ arvados_key_file }};
- - require:
- - pkg: nginx_install
- require_in:
- - file: nginx_config
- - service: nginx_service
- - watch_in:
- - service: nginx_service
-
-
+ - file: nginx_snippet_arvados-snakeoil.conf
+{%- endif %}
# ARVADOS_TAG="2.2.0"
# POSTGRES_TAG="v0.41.6"
# NGINX_TAG="temp-fix-missing-statements-in-pillar"
-# DOCKER_TAG="v1.0.0"
+# DOCKER_TAG="v2.0.7"
# LOCALE_TAG="v0.3.4"
# LETSENCRYPT_TAG="v2.1.0"
# ARVADOS_TAG="2.2.0"
# POSTGRES_TAG="v0.41.6"
# NGINX_TAG="temp-fix-missing-statements-in-pillar"
-# DOCKER_TAG="v1.0.0"
+# DOCKER_TAG="v2.0.7"
# LOCALE_TAG="v0.3.4"
# LETSENCRYPT_TAG="v2.1.0"
# ARVADOS_TAG="2.2.0"
# POSTGRES_TAG="v0.41.6"
# NGINX_TAG="temp-fix-missing-statements-in-pillar"
-# DOCKER_TAG="v1.0.0"
+# DOCKER_TAG="v2.0.7"
# LOCALE_TAG="v0.3.4"
# LETSENCRYPT_TAG="v2.1.0"
-#!/usr/bin/env bash
+#!/bin/bash
# Copyright (C) The Arvados Authors. All rights reserved.
#
# vagrant up
set -o pipefail
+set -x
# capture the directory that the script is running from
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
# Other formula versions we depend on
POSTGRES_TAG="v0.41.6"
NGINX_TAG="temp-fix-missing-statements-in-pillar"
-DOCKER_TAG="v1.0.0"
+DOCKER_TAG="v2.0.7"
LOCALE_TAG="v0.3.4"
LETSENCRYPT_TAG="v2.1.0"
if [ "${DUMP_CONFIG}" = "yes" ]; then
echo "The provision installer will just dump a config under ${DUMP_SALT_CONFIG_DIR} and exit"
else
- apt-get update
- apt-get install -y curl git jq
+ # Install a few dependency packages
+ # First, let's figure out the OS we're working on
+ OS_ID=$(grep ^ID= /etc/os-release |cut -f 2 -d= |cut -f 2 -d \")
+ echo "Detected distro: ${OS_ID}"
+
+ case ${OS_ID} in
+ "centos")
+ echo "WARNING! Disabling SELinux, see https://dev.arvados.org/issues/18019"
+ sed -i 's/SELINUX=enforcing/SELINUX=permissive' /etc/sysconfig/selinux
+ setenforce permissive
+ yum install -y curl git jq
+ ;;
+ "debian"|"ubuntu")
+ DEBIAN_FRONTEND=noninteractive apt update
+ DEBIAN_FRONTEND=noninteractive apt install -y curl git jq
+ ;;
+ esac
if which salt-call; then
echo "Salt already installed"
# Set salt to masterless mode
cat > /etc/salt/minion << EOFSM
+failhard: "True"
+
file_client: local
file_roots:
base:
# Test that the installation finished correctly
if [ "x${TEST}" = "xyes" ]; then
cd ${T_DIR}
- ./run-test.sh
+ # If we use RVM, we need to run this with it, or most ruby commands will fail
+ RVM_EXEC=""
+ if [ -x /usr/local/rvm/bin/rvm-exec ]; then
+ RVM_EXEC="/usr/local/rvm/bin/rvm-exec"
+ fi
+ ${RVM_EXEC} ./run-test.sh
fi
arv user update --uuid "${user_uuid}" --user '{"is_active": true}'
echo "Getting the user API TOKEN"
-user_api_token=$(arv api_client_authorization list --filters "[[\"owner_uuid\", \"=\", \"${user_uuid}\"],[\"kind\", \"==\", \"arvados#apiClientAuthorization\"]]" --limit=1 |jq -r .items[].api_token)
+user_api_token=$(arv api_client_authorization list | jq -r ".items[] | select( .owner_uuid == \"${user_uuid}\" ).api_token" | head -1)
if [ "x${user_api_token}" = "x" ]; then
+ echo "No existing token found for user '__INITIAL_USER__' (user_uuid: '${user_uuid}'). Creating token"
user_api_token=$(arv api_client_authorization create --api-client-authorization "{\"owner_uuid\": \"${user_uuid}\"}" | jq -r .api_token)
fi
+echo "API TOKEN FOR user '__INITIAL_USER__': '${user_api_token}'."
+
# Change to the user's token and run the workflow
+echo "Switching to user '__INITIAL_USER__'"
export ARVADOS_API_TOKEN="${user_api_token}"
echo "Running test CWL workflow"
--- /dev/null
+#!/usr/bin/env python3
+#
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: CC-BY-SA-3.0
+
+import argparse
+import logging
+import random
+import string
+import sys
+
+import arvados
+import arvados.collection
+
+logger = logging.getLogger('arvados.test_collection_create')
+logger.setLevel(logging.INFO)
+
+opts = argparse.ArgumentParser(add_help=False)
+opts.add_argument('--min-files', type=int, default=30000, help="""
+Minimum number of files on each directory. Default: 30000.
+""")
+opts.add_argument('--max-files', type=int, default=30000, help="""
+Maximum number of files on each directory. Default: 30000.
+""")
+opts.add_argument('--min-depth', type=int, default=0, help="""
+Minimum depth for the created tree structure. Default: 0.
+""")
+opts.add_argument('--max-depth', type=int, default=0, help="""
+Maximum depth for the created tree structure. Default: 0.
+""")
+opts.add_argument('--min-subdirs', type=int, default=1, help="""
+Minimum number of subdirectories created at every depth level. Default: 1.
+""")
+opts.add_argument('--max-subdirs', type=int, default=10, help="""
+Maximum number of subdirectories created at every depth level. Default: 10.
+""")
+opts.add_argument('--debug', action='store_true', default=False, help="""
+Sets logging level to DEBUG.
+""")
+
+arg_parser = argparse.ArgumentParser(
+ description='Create a collection with garbage data for testing purposes.',
+ parents=[opts])
+
+adjectives = ['abandoned','able','absolute','adorable','adventurous','academic',
+ 'acceptable','acclaimed','accomplished','accurate','aching','acidic','acrobatic',
+ 'active','actual','adept','admirable','admired','adolescent','adorable','adored',
+ 'advanced','afraid','affectionate','aged','aggravating','aggressive','agile',
+ 'agitated','agonizing','agreeable','ajar','alarmed','alarming','alert','alienated',
+ 'alive','all','altruistic','amazing','ambitious','ample','amused','amusing','anchored',
+ 'ancient','angelic','angry','anguished','animated','annual','another','antique',
+ 'anxious','any','apprehensive','appropriate','apt','arctic','arid','aromatic','artistic',
+ 'ashamed','assured','astonishing','athletic','attached','attentive','attractive',
+ 'austere','authentic','authorized','automatic','avaricious','average','aware','awesome',
+ 'awful','awkward','babyish','bad','back','baggy','bare','barren','basic','beautiful',
+ 'belated','beloved','beneficial','better','best','bewitched','big','big-hearted',
+ 'biodegradable','bite-sized','bitter','black','black-and-white','bland','blank',
+ 'blaring','bleak','blind','blissful','blond','blue','blushing','bogus','boiling',
+ 'bold','bony','boring','bossy','both','bouncy','bountiful','bowed','brave','breakable',
+ 'brief','bright','brilliant','brisk','broken','bronze','brown','bruised','bubbly',
+ 'bulky','bumpy','buoyant','burdensome','burly','bustling','busy','buttery','buzzing',
+ 'calculating','calm','candid','canine','capital','carefree','careful','careless',
+ 'caring','cautious','cavernous','celebrated','charming','cheap','cheerful','cheery',
+ 'chief','chilly','chubby','circular','classic','clean','clear','clear-cut','clever',
+ 'close','closed','cloudy','clueless','clumsy','cluttered','coarse','cold','colorful',
+ 'colorless','colossal','comfortable','common','compassionate','competent','complete',
+ 'complex','complicated','composed','concerned','concrete','confused','conscious',
+ 'considerate','constant','content','conventional','cooked','cool','cooperative',
+ 'coordinated','corny','corrupt','costly','courageous','courteous','crafty','crazy',
+ 'creamy','creative','creepy','criminal','crisp','critical','crooked','crowded',
+ 'cruel','crushing','cuddly','cultivated','cultured','cumbersome','curly','curvy',
+ 'cute','cylindrical','damaged','damp','dangerous','dapper','daring','darling','dark',
+ 'dazzling','dead','deadly','deafening','dear','dearest','decent','decimal','decisive',
+ 'deep','defenseless','defensive','defiant','deficient','definite','definitive','delayed',
+ 'delectable','delicious','delightful','delirious','demanding','dense','dental',
+ 'dependable','dependent','descriptive','deserted','detailed','determined','devoted',
+ 'different','difficult','digital','diligent','dim','dimpled','dimwitted','direct',
+ 'disastrous','discrete','disfigured','disgusting','disloyal','dismal','distant',
+ 'downright','dreary','dirty','disguised','dishonest','dismal','distant','distinct',
+ 'distorted','dizzy','dopey','doting','double','downright','drab','drafty','dramatic',
+ 'dreary','droopy','dry','dual','dull','dutiful','each','eager','earnest','early',
+ 'easy','easy-going','ecstatic','edible','educated','elaborate','elastic','elated',
+ 'elderly','electric','elegant','elementary','elliptical','embarrassed','embellished',
+ 'eminent','emotional','empty','enchanted','enchanting','energetic','enlightened',
+ 'enormous','enraged','entire','envious','equal','equatorial','essential','esteemed',
+ 'ethical','euphoric','even','evergreen','everlasting','every','evil','exalted',
+ 'excellent','exemplary','exhausted','excitable','excited','exciting','exotic',
+ 'expensive','experienced','expert','extraneous','extroverted','extra-large','extra-small',
+ 'fabulous','failing','faint','fair','faithful','fake','false','familiar','famous',
+ 'fancy','fantastic','far','faraway','far-flung','far-off','fast','fat','fatal',
+ 'fatherly','favorable','favorite','fearful','fearless','feisty','feline','female',
+ 'feminine','few','fickle','filthy','fine','finished','firm','first','firsthand',
+ 'fitting','fixed','flaky','flamboyant','flashy','flat','flawed','flawless','flickering',
+ 'flimsy','flippant','flowery','fluffy','fluid','flustered','focused','fond','foolhardy',
+ 'foolish','forceful','forked','formal','forsaken','forthright','fortunate','fragrant',
+ 'frail','frank','frayed','free','French','fresh','frequent','friendly','frightened',
+ 'frightening','frigid','frilly','frizzy','frivolous','front','frosty','frozen',
+ 'frugal','fruitful','full','fumbling','functional','funny','fussy','fuzzy','gargantuan',
+ 'gaseous','general','generous','gentle','genuine','giant','giddy','gigantic','gifted',
+ 'giving','glamorous','glaring','glass','gleaming','gleeful','glistening','glittering',
+ 'gloomy','glorious','glossy','glum','golden','good','good-natured','gorgeous',
+ 'graceful','gracious','grand','grandiose','granular','grateful','grave','gray',
+ 'great','greedy','green','gregarious','grim','grimy','gripping','grizzled','gross',
+ 'grotesque','grouchy','grounded','growing','growling','grown','grubby','gruesome',
+ 'grumpy','guilty','gullible','gummy','hairy','half','handmade','handsome','handy',
+ 'happy','happy-go-lucky','hard','hard-to-find','harmful','harmless','harmonious',
+ 'harsh','hasty','hateful','haunting','healthy','heartfelt','hearty','heavenly',
+ 'heavy','hefty','helpful','helpless','hidden','hideous','high','high-level','hilarious',
+ 'hoarse','hollow','homely','honest','honorable','honored','hopeful','horrible',
+ 'hospitable','hot','huge','humble','humiliating','humming','humongous','hungry',
+ 'hurtful','husky','icky','icy','ideal','idealistic','identical','idle','idiotic',
+ 'idolized','ignorant','ill','illegal','ill-fated','ill-informed','illiterate',
+ 'illustrious','imaginary','imaginative','immaculate','immaterial','immediate',
+ 'immense','impassioned','impeccable','impartial','imperfect','imperturbable','impish',
+ 'impolite','important','impossible','impractical','impressionable','impressive',
+ 'improbable','impure','inborn','incomparable','incompatible','incomplete','inconsequential',
+ 'incredible','indelible','inexperienced','indolent','infamous','infantile','infatuated',
+ 'inferior','infinite','informal','innocent','insecure','insidious','insignificant',
+ 'insistent','instructive','insubstantial','intelligent','intent','intentional',
+ 'interesting','internal','international','intrepid','ironclad','irresponsible',
+ 'irritating','itchy','jaded','jagged','jam-packed','jaunty','jealous','jittery',
+ 'joint','jolly','jovial','joyful','joyous','jubilant','judicious','juicy','jumbo',
+ 'junior','jumpy','juvenile','kaleidoscopic','keen','key','kind','kindhearted','kindly',
+ 'klutzy','knobby','knotty','knowledgeable','knowing','known','kooky','kosher','lame',
+ 'lanky','large','last','lasting','late','lavish','lawful','lazy','leading','lean',
+ 'leafy','left','legal','legitimate','light','lighthearted','likable','likely','limited',
+ 'limp','limping','linear','lined','liquid','little','live','lively','livid','loathsome',
+ 'lone','lonely','long','long-term','loose','lopsided','lost','loud','lovable','lovely',
+ 'loving','low','loyal','lucky','lumbering','luminous','lumpy','lustrous','luxurious',
+ 'mad','made-up','magnificent','majestic','major','male','mammoth','married','marvelous',
+ 'masculine','massive','mature','meager','mealy','mean','measly','meaty','medical',
+ 'mediocre','medium','meek','mellow','melodic','memorable','menacing','merry','messy',
+ 'metallic','mild','milky','mindless','miniature','minor','minty','miserable','miserly',
+ 'misguided','misty','mixed','modern','modest','moist','monstrous','monthly','monumental',
+ 'moral','mortified','motherly','motionless','mountainous','muddy','muffled','multicolored',
+ 'mundane','murky','mushy','musty','muted','mysterious','naive','narrow','nasty','natural',
+ 'naughty','nautical','near','neat','necessary','needy','negative','neglected','negligible',
+ 'neighboring','nervous','new','next','nice','nifty','nimble','nippy','nocturnal','noisy',
+ 'nonstop','normal','notable','noted','noteworthy','novel','noxious','numb','nutritious',
+ 'nutty','obedient','obese','oblong','oily','oblong','obvious','occasional','odd',
+ 'oddball','offbeat','offensive','official','old','old-fashioned','only','open','optimal',
+ 'optimistic','opulent','orange','orderly','organic','ornate','ornery','ordinary',
+ 'original','other','our','outlying','outgoing','outlandish','outrageous','outstanding',
+ 'oval','overcooked','overdue','overjoyed','overlooked','palatable','pale','paltry',
+ 'parallel','parched','partial','passionate','past','pastel','peaceful','peppery',
+ 'perfect','perfumed','periodic','perky','personal','pertinent','pesky','pessimistic',
+ 'petty','phony','physical','piercing','pink','pitiful','plain','plaintive','plastic',
+ 'playful','pleasant','pleased','pleasing','plump','plush','polished','polite','political',
+ 'pointed','pointless','poised','poor','popular','portly','posh','positive','possible',
+ 'potable','powerful','powerless','practical','precious','present','prestigious',
+ 'pretty','precious','previous','pricey','prickly','primary','prime','pristine','private',
+ 'prize','probable','productive','profitable','profuse','proper','proud','prudent',
+ 'punctual','pungent','puny','pure','purple','pushy','putrid','puzzled','puzzling',
+ 'quaint','qualified','quarrelsome','quarterly','queasy','querulous','questionable',
+ 'quick','quick-witted','quiet','quintessential','quirky','quixotic','quizzical',
+ 'radiant','ragged','rapid','rare','rash','raw','recent','reckless','rectangular',
+ 'ready','real','realistic','reasonable','red','reflecting','regal','regular',
+ 'reliable','relieved','remarkable','remorseful','remote','repentant','required',
+ 'respectful','responsible','repulsive','revolving','rewarding','rich','rigid',
+ 'right','ringed','ripe','roasted','robust','rosy','rotating','rotten','rough',
+ 'round','rowdy','royal','rubbery','rundown','ruddy','rude','runny','rural','rusty',
+ 'sad','safe','salty','same','sandy','sane','sarcastic','sardonic','satisfied',
+ 'scaly','scarce','scared','scary','scented','scholarly','scientific','scornful',
+ 'scratchy','scrawny','second','secondary','second-hand','secret','self-assured',
+ 'self-reliant','selfish','sentimental','separate','serene','serious','serpentine',
+ 'several','severe','shabby','shadowy','shady','shallow','shameful','shameless',
+ 'sharp','shimmering','shiny','shocked','shocking','shoddy','short','short-term',
+ 'showy','shrill','shy','sick','silent','silky','silly','silver','similar','simple',
+ 'simplistic','sinful','single','sizzling','skeletal','skinny','sleepy','slight',
+ 'slim','slimy','slippery','slow','slushy','small','smart','smoggy','smooth','smug',
+ 'snappy','snarling','sneaky','sniveling','snoopy','sociable','soft','soggy','solid',
+ 'somber','some','spherical','sophisticated','sore','sorrowful','soulful','soupy',
+ 'sour','Spanish','sparkling','sparse','specific','spectacular','speedy','spicy',
+ 'spiffy','spirited','spiteful','splendid','spotless','spotted','spry','square',
+ 'squeaky','squiggly','stable','staid','stained','stale','standard','starchy','stark',
+ 'starry','steep','sticky','stiff','stimulating','stingy','stormy','straight','strange',
+ 'steel','strict','strident','striking','striped','strong','studious','stunning',
+ 'stupendous','stupid','sturdy','stylish','subdued','submissive','substantial','subtle',
+ 'suburban','sudden','sugary','sunny','super','superb','superficial','superior',
+ 'supportive','sure-footed','surprised','suspicious','svelte','sweaty','sweet','sweltering',
+ 'swift','sympathetic','tall','talkative','tame','tan','tangible','tart','tasty',
+ 'tattered','taut','tedious','teeming','tempting','tender','tense','tepid','terrible',
+ 'terrific','testy','thankful','that','these','thick','thin','third','thirsty','this',
+ 'thorough','thorny','those','thoughtful','threadbare','thrifty','thunderous','tidy',
+ 'tight','timely','tinted','tiny','tired','torn','total','tough','traumatic','treasured',
+ 'tremendous','tragic','trained','tremendous','triangular','tricky','trifling','trim',
+ 'trivial','troubled','true','trusting','trustworthy','trusty','truthful','tubby',
+ 'turbulent','twin','ugly','ultimate','unacceptable','unaware','uncomfortable',
+ 'uncommon','unconscious','understated','unequaled','uneven','unfinished','unfit',
+ 'unfolded','unfortunate','unhappy','unhealthy','uniform','unimportant','unique',
+ 'united','unkempt','unknown','unlawful','unlined','unlucky','unnatural','unpleasant',
+ 'unrealistic','unripe','unruly','unselfish','unsightly','unsteady','unsung','untidy',
+ 'untimely','untried','untrue','unused','unusual','unwelcome','unwieldy','unwilling',
+ 'unwitting','unwritten','upbeat','upright','upset','urban','usable','used','useful',
+ 'useless','utilized','utter','vacant','vague','vain','valid','valuable','vapid',
+ 'variable','vast','velvety','venerated','vengeful','verifiable','vibrant','vicious',
+ 'victorious','vigilant','vigorous','villainous','violet','violent','virtual',
+ 'virtuous','visible','vital','vivacious','vivid','voluminous','wan','warlike','warm',
+ 'warmhearted','warped','wary','wasteful','watchful','waterlogged','watery','wavy',
+ 'wealthy','weak','weary','webbed','wee','weekly','weepy','weighty','weird','welcome',
+ 'well-documented','well-groomed','well-informed','well-lit','well-made','well-off',
+ 'well-to-do','well-worn','wet','which','whimsical','whirlwind','whispered','white',
+ 'whole','whopping','wicked','wide','wide-eyed','wiggly','wild','willing','wilted',
+ 'winding','windy','winged','wiry','wise','witty','wobbly','woeful','wonderful',
+ 'wooden','woozy','wordy','worldly','worn','worried','worrisome','worse','worst',
+ 'worthless','worthwhile','worthy','wrathful','wretched','writhing','wrong','wry',
+ 'yawning','yearly','yellow','yellowish','young','youthful','yummy','zany','zealous',
+ 'zesty','zigzag']
+nouns = ['people','history','way','art','world','information','map','two','family',
+ 'government','health','system','computer','meat','year','thanks','music','person',
+ 'reading','method','data','food','understanding','theory','law','bird','literature',
+ 'problem','software','control','knowledge','power','ability','economics','love',
+ 'internet','television','science','library','nature','fact','product','idea',
+ 'temperature','investment','area','society','activity','story','industry','media',
+ 'thing','oven','community','definition','safety','quality','development','language',
+ 'management','player','variety','video','week','security','country','exam','movie',
+ 'organization','equipment','physics','analysis','policy','series','thought','basis',
+ 'boyfriend','direction','strategy','technology','army','camera','freedom','paper',
+ 'environment','child','instance','month','truth','marketing','university','writing',
+ 'article','department','difference','goal','news','audience','fishing','growth',
+ 'income','marriage','user','combination','failure','meaning','medicine','philosophy',
+ 'teacher','communication','night','chemistry','disease','disk','energy','nation',
+ 'road','role','soup','advertising','location','success','addition','apartment','education',
+ 'math','moment','painting','politics','attention','decision','event','property',
+ 'shopping','student','wood','competition','distribution','entertainment','office',
+ 'population','president','unit','category','cigarette','context','introduction',
+ 'opportunity','performance','driver','flight','length','magazine','newspaper',
+ 'relationship','teaching','cell','dealer','finding','lake','member','message','phone',
+ 'scene','appearance','association','concept','customer','death','discussion','housing',
+ 'inflation','insurance','mood','woman','advice','blood','effort','expression','importance',
+ 'opinion','payment','reality','responsibility','situation','skill','statement','wealth',
+ 'application','city','county','depth','estate','foundation','grandmother','heart',
+ 'perspective','photo','recipe','studio','topic','collection','depression','imagination',
+ 'passion','percentage','resource','setting','ad','agency','college','connection',
+ 'criticism','debt','description','memory','patience','secretary','solution','administration',
+ 'aspect','attitude','director','personality','psychology','recommendation','response',
+ 'selection','storage','version','alcohol','argument','complaint','contract','emphasis',
+ 'highway','loss','membership','possession','preparation','steak','union','agreement',
+ 'cancer','currency','employment','engineering','entry','interaction','mixture','preference',
+ 'region','republic','tradition','virus','actor','classroom','delivery','device',
+ 'difficulty','drama','election','engine','football','guidance','hotel','owner',
+ 'priority','protection','suggestion','tension','variation','anxiety','atmosphere',
+ 'awareness','bath','bread','candidate','climate','comparison','confusion','construction',
+ 'elevator','emotion','employee','employer','guest','height','leadership','mall','manager',
+ 'operation','recording','sample','transportation','charity','cousin','disaster','editor',
+ 'efficiency','excitement','extent','feedback','guitar','homework','leader','mom','outcome',
+ 'permission','presentation','promotion','reflection','refrigerator','resolution','revenue',
+ 'session','singer','tennis','basket','bonus','cabinet','childhood','church','clothes','coffee',
+ 'dinner','drawing','hair','hearing','initiative','judgment','lab','measurement','mode','mud',
+ 'orange','poetry','police','possibility','procedure','queen','ratio','relation','restaurant',
+ 'satisfaction','sector','signature','significance','song','tooth','town','vehicle','volume','wife',
+ 'accident','airport','appointment','arrival','assumption','baseball','chapter','committee',
+ 'conversation','database','enthusiasm','error','explanation','farmer','gate','girl','hall',
+ 'historian','hospital','injury','instruction','maintenance','manufacturer','meal','perception','pie',
+ 'poem','presence','proposal','reception','replacement','revolution','river','son','speech','tea',
+ 'village','warning','winner','worker','writer','assistance','breath','buyer','chest','chocolate',
+ 'conclusion','contribution','cookie','courage','dad','desk','drawer','establishment','examination',
+ 'garbage','grocery','honey','impression','improvement','independence','insect','inspection',
+ 'inspector','king','ladder','menu','penalty','piano','potato','profession','professor','quantity',
+ 'reaction','requirement','salad','sister','supermarket','tongue','weakness','wedding','affair',
+ 'ambition','analyst','apple','assignment','assistant','bathroom','bedroom','beer','birthday',
+ 'celebration','championship','cheek','client','consequence','departure','diamond','dirt','ear',
+ 'fortune','friendship','funeral','gene','girlfriend','hat','indication','intention','lady',
+ 'midnight','negotiation','obligation','passenger','pizza','platform','poet','pollution',
+ 'recognition','reputation','shirt','sir','speaker','stranger','surgery','sympathy','tale','throat',
+ 'trainer','uncle','youth','time','work','film','water','money','example','while','business','study',
+ 'game','life','form','air','day','place','number','part','field','fish','back','process','heat',
+ 'hand','experience','job','book','end','point','type','home','economy','value','body','market',
+ 'guide','interest','state','radio','course','company','price','size','card','list','mind','trade',
+ 'line','care','group','risk','word','fat','force','key','light','training','name','school','top',
+ 'amount','level','order','practice','research','sense','service','piece','web','boss','sport','fun',
+ 'house','page','term','test','answer','sound','focus','matter','kind','soil','board','oil','picture',
+ 'access','garden','range','rate','reason','future','site','demand','exercise','image','case','cause',
+ 'coast','action','age','bad','boat','record','result','section','building','mouse','cash','class',
+ 'nothing','period','plan','store','tax','side','subject','space','rule','stock','weather','chance',
+ 'figure','man','model','source','beginning','earth','program','chicken','design','feature','head',
+ 'material','purpose','question','rock','salt','act','birth','car','dog','object','scale','sun',
+ 'note','profit','rent','speed','style','war','bank','craft','half','inside','outside','standard',
+ 'bus','exchange','eye','fire','position','pressure','stress','advantage','benefit','box','frame',
+ 'issue','step','cycle','face','item','metal','paint','review','room','screen','structure','view',
+ 'account','ball','discipline','medium','share','balance','bit','black','bottom','choice','gift',
+ 'impact','machine','shape','tool','wind','address','average','career','culture','morning','pot',
+ 'sign','table','task','condition','contact','credit','egg','hope','ice','network','north','square',
+ 'attempt','date','effect','link','post','star','voice','capital','challenge','friend','self','shot',
+ 'brush','couple','debate','exit','front','function','lack','living','plant','plastic','spot',
+ 'summer','taste','theme','track','wing','brain','button','click','desire','foot','gas','influence',
+ 'notice','rain','wall','base','damage','distance','feeling','pair','savings','staff','sugar',
+ 'target','text','animal','author','budget','discount','file','ground','lesson','minute','officer',
+ 'phase','reference','register','sky','stage','stick','title','trouble','bowl','bridge','campaign',
+ 'character','club','edge','evidence','fan','letter','lock','maximum','novel','option','pack','park',
+ 'plenty','quarter','skin','sort','weight','baby','background','carry','dish','factor','fruit',
+ 'glass','joint','master','muscle','red','strength','traffic','trip','vegetable','appeal','chart',
+ 'gear','ideal','kitchen','land','log','mother','net','party','principle','relative','sale','season',
+ 'signal','spirit','street','tree','wave','belt','bench','commission','copy','drop','minimum','path',
+ 'progress','project','sea','south','status','stuff','ticket','tour','angle','blue','breakfast',
+ 'confidence','daughter','degree','doctor','dot','dream','duty','essay','father','fee','finance',
+ 'hour','juice','limit','luck','milk','mouth','peace','pipe','seat','stable','storm','substance',
+ 'team','trick','afternoon','bat','beach','blank','catch','chain','consideration','cream','crew',
+ 'detail','gold','interview','kid','mark','match','mission','pain','pleasure','score','screw','sex',
+ 'shop','shower','suit','tone','window','agent','band','block','bone','calendar','cap','coat',
+ 'contest','corner','court','cup','district','door','east','finger','garage','guarantee','hole',
+ 'hook','implement','layer','lecture','lie','manner','meeting','nose','parking','partner','profile',
+ 'respect','rice','routine','schedule','swimming','telephone','tip','winter','airline','bag','battle',
+ 'bed','bill','bother','cake','code','curve','designer','dimension','dress','ease','emergency',
+ 'evening','extension','farm','fight','gap','grade','holiday','horror','horse','host','husband',
+ 'loan','mistake','mountain','nail','noise','occasion','package','patient','pause','phrase','proof',
+ 'race','relief','sand','sentence','shoulder','smoke','stomach','string','tourist','towel','vacation',
+ 'west','wheel','wine','arm','aside','associate','bet','blow','border','branch','breast','brother',
+ 'buddy','bunch','chip','coach','cross','document','draft','dust','expert','floor','god','golf',
+ 'habit','iron','judge','knife','landscape','league','mail','mess','native','opening','parent',
+ 'pattern','pin','pool','pound','request','salary','shame','shelter','shoe','silver','tackle','tank',
+ 'trust','assist','bake','bar','bell','bike','blame','boy','brick','chair','closet','clue','collar',
+ 'comment','conference','devil','diet','fear','fuel','glove','jacket','lunch','monitor','mortgage',
+ 'nurse','pace','panic','peak','plane','reward','row','sandwich','shock','spite','spray','surprise',
+ 'till','transition','weekend','welcome','yard','alarm','bend','bicycle','bite','blind','bottle',
+ 'cable','candle','clerk','cloud','concert','counter','flower','grandfather','harm','knee','lawyer',
+ 'leather','load','mirror','neck','pension','plate','purple','ruin','ship','skirt','slice','snow',
+ 'specialist','stroke','switch','trash','tune','zone','anger','award','bid','bitter','boot','bug',
+ 'camp','candy','carpet','cat','champion','channel','clock','comfort','cow','crack','engineer',
+ 'entrance','fault','grass','guy','hell','highlight','incident','island','joke','jury','leg','lip',
+ 'mate','motor','nerve','passage','pen','pride','priest','prize','promise','resident','resort','ring',
+ 'roof','rope','sail','scheme','script','sock','station','toe','tower','truck','witness','a','you',
+ 'it','can','will','if','one','many','most','other','use','make','good','look','help','go','great',
+ 'being','few','might','still','public','read','keep','start','give','human','local','general','she',
+ 'specific','long','play','feel','high','tonight','put','common','set','change','simple','past','big',
+ 'possible','particular','today','major','personal','current','national','cut','natural','physical',
+ 'show','try','check','second','call','move','pay','let','increase','single','individual','turn',
+ 'ask','buy','guard','hold','main','offer','potential','professional','international','travel','cook',
+ 'alternative','following','special','working','whole','dance','excuse','cold','commercial','low',
+ 'purchase','deal','primary','worth','fall','necessary','positive','produce','search','present',
+ 'spend','talk','creative','tell','cost','drive','green','support','glad','remove','return','run',
+ 'complex','due','effective','middle','regular','reserve','independent','leave','original','reach',
+ 'rest','serve','watch','beautiful','charge','active','break','negative','safe','stay','visit',
+ 'visual','affect','cover','report','rise','walk','white','beyond','junior','pick','unique',
+ 'anything','classic','final','lift','mix','private','stop','teach','western','concern','familiar',
+ 'fly','official','broad','comfortable','gain','maybe','rich','save','stand','young','fail','heavy',
+ 'hello','lead','listen','valuable','worry','handle','leading','meet','release','sell','finish',
+ 'normal','press','ride','secret','spread','spring','tough','wait','brown','deep','display','flow',
+ 'hit','objective','shoot','touch','cancel','chemical','cry','dump','extreme','push','conflict','eat',
+ 'fill','formal','jump','kick','opposite','pass','pitch','remote','total','treat','vast','abuse',
+ 'beat','burn','deposit','print','raise','sleep','somewhere','advance','anywhere','consist','dark',
+ 'double','draw','equal','fix','hire','internal','join','kill','sensitive','tap','win','attack',
+ 'claim','constant','drag','drink','guess','minor','pull','raw','soft','solid','wear','weird',
+ 'wonder','annual','count','dead','doubt','feed','forever','impress','nobody','repeat','round','sing',
+ 'slide','strip','whereas','wish','combine','command','dig','divide','equivalent','hang','hunt',
+ 'initial','march','mention','smell','spiritual','survey','tie','adult','brief','crazy','escape',
+ 'gather','hate','prior','repair','rough','sad','scratch','sick','strike','employ','external','hurt',
+ 'illegal','laugh','lay','mobile','nasty','ordinary','respond','royal','senior','split','strain',
+ 'struggle','swim','train','upper','wash','yellow','convert','crash','dependent','fold','funny',
+ 'grab','hide','miss','permit','quote','recover','resolve','roll','sink','slip','spare','suspect',
+ 'sweet','swing','twist','upstairs','usual','abroad','brave','calm','concentrate','estimate','grand',
+ 'male','mine','prompt','quiet','refuse','regret','reveal','rush','shake','shift','shine','steal',
+ 'suck','surround','anybody','bear','brilliant','dare','dear','delay','drunk','female','hurry',
+ 'inevitable','invite','kiss','neat','pop','punch','quit','reply','representative','resist','rip',
+ 'rub','silly','smile','spell','stretch','stupid','tear','temporary','tomorrow','wake','wrap',
+ 'yesterday']
+
+def get_random_name(with_ext=True):
+ return "{}_{}_{}{}".format(
+ random.choice(adjectives),
+ random.choice(nouns),
+ random.randint(0, 50000),
+ with_ext and '.txt' or '')
+
+def get_random_file(max_filesize):
+ file_start = random.randint(0, (max_filesize - 1025))
+ file_size = random.randint(0, (max_filesize - file_start))
+ file_name = get_random_name()
+ return "{}:{}:{}".format(file_start, file_size, file_name)
+
+def get_stream(name, max_filesize, data_loc, args):
+ files = []
+ for _ in range(random.randint(args.min_files, args.max_files)):
+ files.append(get_random_file(max_filesize))
+ stream = "{} {} {}".format(name, data_loc, ' '.join(files))
+ return stream
+
+def create_substreams(depth, base_stream_name, max_filesize, data_loc, args, current_size=0):
+ current_stream = get_stream(base_stream_name, max_filesize, data_loc, args)
+ current_size += len(current_stream)
+ streams = [current_stream]
+
+ if current_size >= (128 * 1024 * 1024):
+ logger.debug("Maximum manifest size reached -- finishing early at {}".format(base_stream_name))
+ elif depth == 0:
+ logger.debug("Finished stream {}".format(base_stream_name))
+ else:
+ for _ in range(random.randint(args.min_subdirs, args.max_subdirs)):
+ stream_name = base_stream_name+'/'+get_random_name(False)
+ substreams = create_substreams(depth-1, stream_name, max_filesize,
+ data_loc, args, current_size)
+ current_size += sum([len(x) for x in substreams])
+ if current_size >= (128 * 1024 * 1024):
+ break
+ streams.extend(substreams)
+ return streams
+
+def parse_arguments(arguments):
+ args = arg_parser.parse_args(arguments)
+ if args.debug:
+ logger.setLevel(logging.DEBUG)
+ if args.max_files < args.min_files:
+ arg_parser.error("--min-files={} should be less or equal than max-files={}".format(args.min_files, args.max_files))
+ if args.min_depth < 0:
+ arg_parser.error("--min-depth should be at least 0")
+ if args.max_depth < 0 or args.max_depth < args.min_depth:
+ arg_parser.error("--max-depth should be at >= 0 and >= min-depth={}".format(args.min_depth))
+ if args.max_subdirs < args.min_subdirs:
+ arg_parser.error("--min-subdirs={} should be less or equal than max-subdirs={}".format(args.min_subdirs, args.max_subdirs))
+ return args
+
+def main(arguments=None):
+ args = parse_arguments(arguments)
+ logger.info("Creating test collection with (min={}, max={}) files per directory and a tree depth of (min={}, max={}) and (min={}, max={}) subdirs in each depth level...".format(args.min_files, args.max_files, args.min_depth, args.max_depth, args.min_subdirs, args.max_subdirs))
+ api = arvados.api('v1', timeout=5*60)
+ max_filesize = 1024*1024
+ data_block = ''.join([random.choice(string.printable) for i in range(max_filesize)])
+ data_loc = arvados.KeepClient(api).put(data_block)
+ streams = create_substreams(random.randint(args.min_depth, args.max_depth),
+ '.', max_filesize, data_loc, args)
+ manifest = ''
+ for s in streams:
+ if len(manifest)+len(s) > (1024*1024*128)-2:
+ logger.info("Skipping stream {} to avoid making a manifest bigger than 128MiB".format(s.split(' ')[0]))
+ break
+ manifest += s + '\n'
+ try:
+ coll_name = get_random_name(False)
+ coll = api.collections().create(
+ body={"collection": {
+ "name": coll_name,
+ "manifest_text": manifest
+ },
+ }).execute()
+ except:
+ logger.info("ERROR creating collection with name '{}' and manifest:\n'{}...'\nSize: {}".format(coll_name, manifest[0:1024], len(manifest)))
+ raise
+ logger.info("Created collection {} - manifest size: {}".format(coll["uuid"], len(manifest)))
+ return 0
+
+if __name__ == "__main__":
+ sys.exit(main())
\ No newline at end of file