Merge branch '18122-select-param'
authorTom Clegg <tom@curii.com>
Mon, 20 Sep 2021 14:22:39 +0000 (10:22 -0400)
committerTom Clegg <tom@curii.com>
Mon, 20 Sep 2021 14:22:39 +0000 (10:22 -0400)
closes #18122

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

24 files changed:
lib/config/config.default.yml
lib/config/deprecated.go
lib/config/deprecated_test.go
lib/config/generated_config.go
lib/dispatchcloud/scheduler/run_queue.go
sdk/cwl/arvados_cwl/arvcontainer.py
sdk/cwl/arvados_cwl/arvtool.py
sdk/cwl/arvados_cwl/context.py
sdk/cwl/setup.py
sdk/cwl/tests/test_container.py
sdk/go/arvados/config.go
sdk/go/arvados/fs_collection.go
sdk/go/arvados/fs_collection_test.go
sdk/python/arvados/keep.py
sdk/python/setup.py
sdk/python/tests/test_keep_client.py
services/keep-web/cache.go
services/keep-web/cache_test.go
services/keep-web/handler_test.go
services/keep-web/main.go
services/keep-web/server_test.go
services/keep-web/status_test.go
tools/sync-groups/sync-groups.go
tools/sync-groups/sync-groups_test.go

index 2b474ffdb0c0bdd4eed70a9499985a06c1a22c4d..4bcffd90955aabcac77773c5fafe4445abe5cf37 100644 (file)
@@ -553,9 +553,6 @@ Clusters:
         # Approximate memory limit (in bytes) for collection cache.
         MaxCollectionBytes: 100000000
 
-        # Permission cache entries.
-        MaxPermissionEntries: 1000
-
         # UUID cache entries.
         MaxUUIDEntries: 1000
 
index efc9f0837ea531872d92f551c8030e4f9241def4..e9c5da1064848fe5917cfa882d937302e2b33c29 100644 (file)
@@ -449,7 +449,6 @@ type oldKeepWebConfig struct {
                UUIDTTL              *arvados.Duration
                MaxCollectionEntries *int
                MaxCollectionBytes   *int64
-               MaxPermissionEntries *int
                MaxUUIDEntries       *int
        }
 
@@ -505,9 +504,6 @@ func (ldr *Loader) loadOldKeepWebConfig(cfg *arvados.Config) error {
        if oc.Cache.MaxCollectionBytes != nil {
                cluster.Collections.WebDAVCache.MaxCollectionBytes = *oc.Cache.MaxCollectionBytes
        }
-       if oc.Cache.MaxPermissionEntries != nil {
-               cluster.Collections.WebDAVCache.MaxPermissionEntries = *oc.Cache.MaxPermissionEntries
-       }
        if oc.Cache.MaxUUIDEntries != nil {
                cluster.Collections.WebDAVCache.MaxUUIDEntries = *oc.Cache.MaxUUIDEntries
        }
index 7cb169c618850a2ca5509dce24d00ee5a7a71f28..595e4c9cad4e849c21d6f3d6b4c11b6b6d4b51ac 100644 (file)
@@ -185,7 +185,6 @@ func (s *LoadSuite) TestLegacyKeepWebConfig(c *check.C) {
                "UUIDTTL": "1s",
                "MaxCollectionEntries": 42,
                "MaxCollectionBytes": 1234567890,
-               "MaxPermissionEntries": 100,
                "MaxUUIDEntries": 100
        },
        "ManagementToken": "xyzzy"
@@ -201,7 +200,6 @@ func (s *LoadSuite) TestLegacyKeepWebConfig(c *check.C) {
        c.Check(cluster.Collections.WebDAVCache.UUIDTTL, check.Equals, arvados.Duration(time.Second))
        c.Check(cluster.Collections.WebDAVCache.MaxCollectionEntries, check.Equals, 42)
        c.Check(cluster.Collections.WebDAVCache.MaxCollectionBytes, check.Equals, int64(1234567890))
-       c.Check(cluster.Collections.WebDAVCache.MaxPermissionEntries, check.Equals, 100)
        c.Check(cluster.Collections.WebDAVCache.MaxUUIDEntries, check.Equals, 100)
 
        c.Check(cluster.Services.WebDAVDownload.ExternalURL, check.Equals, arvados.URL{Host: "download.example.com", Path: "/"})
index 2d47addff735e40c80a88ec9a56aa1323d827e09..fd07592e982f1ae668028357ed4894d7e985145f 100644 (file)
@@ -559,9 +559,6 @@ Clusters:
         # Approximate memory limit (in bytes) for collection cache.
         MaxCollectionBytes: 100000000
 
-        # Permission cache entries.
-        MaxPermissionEntries: 1000
-
         # UUID cache entries.
         MaxUUIDEntries: 1000
 
index e9fc5f90215156051fb6de95c123da2c83022700..f729f0dc23a7f927eca6c39fe75734a4a2355ad9 100644 (file)
@@ -78,10 +78,12 @@ tryrun:
                                logger.Trace("overquota")
                                overquota = sorted[i:]
                                break tryrun
-                       } else if logger.Info("creating new instance"); sch.pool.Create(it) {
+                       } else if sch.pool.Create(it) {
                                // Success. (Note pool.Create works
                                // asynchronously and does its own
-                               // logging, so we don't need to.)
+                               // logging about the eventual outcome,
+                               // so we don't need to.)
+                               logger.Info("creating new instance")
                        } else {
                                // Failed despite not being at quota,
                                // e.g., cloud ops throttled.  TODO:
index 1029ca0e05bff2e0bf501f61fa65f19ea482b2e6..1e79566f4055578ce61c0b37cd9c753429e1da51 100644 (file)
@@ -57,6 +57,12 @@ class ArvadosContainer(JobBase):
     def update_pipeline_component(self, r):
         pass
 
+    def _required_env(self):
+        env = {}
+        env["HOME"] = self.outdir
+        env["TMPDIR"] = self.tmpdir
+        return env
+
     def run(self, runtimeContext):
         # ArvadosCommandTool subclasses from cwltool.CommandLineTool,
         # which calls makeJobRunner() to get a new ArvadosContainer
@@ -234,8 +240,6 @@ class ArvadosContainer(JobBase):
                                 "path": "%s/%s" % (self.outdir, self.stdout)}
 
         (docker_req, docker_is_req) = self.get_requirement("DockerRequirement")
-        if not docker_req:
-            docker_req = {"dockerImageId": "arvados/jobs:"+__version__}
 
         container_request["container_image"] = arv_docker_get_image(self.arvrunner.api,
                                                                     docker_req,
index 13664a8dfb0d57df0477d4c627928b9be17ad8d7..83648f46aa89424652323729b0241e85d2d125e8 100644 (file)
@@ -6,6 +6,7 @@ from cwltool.command_line_tool import CommandLineTool, ExpressionTool
 from .arvcontainer import ArvadosContainer
 from .pathmapper import ArvPathMapper
 from .runner import make_builder
+from ._version import __version__
 from functools import partial
 from schema_salad.sourceline import SourceLine
 from cwltool.errors import WorkflowException
@@ -57,6 +58,12 @@ class ArvadosCommandTool(CommandLineTool):
 
     def __init__(self, arvrunner, toolpath_object, loadingContext):
         super(ArvadosCommandTool, self).__init__(toolpath_object, loadingContext)
+
+        (docker_req, docker_is_req) = self.get_requirement("DockerRequirement")
+        if not docker_req:
+            self.hints.append({"class": "DockerRequirement",
+                               "dockerImageId": "arvados/jobs:"+__version__})
+
         self.arvrunner = arvrunner
 
     def make_job_runner(self, runtimeContext):
index 77d4027ccbabccf72e3fe5f60ad049726c1b99d1..1e04dd5774ebb8bbc45ebdd417c35138f2d13a4d 100644 (file)
@@ -41,3 +41,15 @@ class ArvRuntimeContext(RuntimeContext):
 
         if self.submit_request_uuid:
             self.submit_runner_cluster = self.submit_request_uuid[0:5]
+
+    def get_outdir(self) -> str:
+        """Return self.outdir or create one with self.tmp_outdir_prefix."""
+        return self.outdir
+
+    def get_tmpdir(self) -> str:
+        """Return self.tmpdir or create one with self.tmpdir_prefix."""
+        return self.tmpdir
+
+    def create_tmpdir(self) -> str:
+        """Return self.tmpdir or create one with self.tmpdir_prefix."""
+        return self.tmpdir
index 34fe0c15dd549f8f1641a8d203b9941979158186..3f1f8a6bed1b3ee5a1e883ecded075d89df62b5a 100644 (file)
@@ -39,17 +39,13 @@ setup(name='arvados-cwl-runner',
       # file to determine what version of cwltool and schema-salad to
       # build.
       install_requires=[
-          'cwltool==3.0.20210319143721',
-          'schema-salad==7.1.20210611090601',
+          'cwltool==3.1.20210816212154',
+          'schema-salad==8.2.20210902094147',
           'arvados-python-client{}'.format(pysdk_dep),
           'setuptools',
           'ciso8601 >= 2.0.0',
           'networkx < 2.6'
       ],
-      extras_require={
-          ':os.name=="posix" and python_version<"3"': ['subprocess32 >= 3.5.1'],
-          ':python_version<"3"': ['pytz'],
-      },
       data_files=[
           ('share/doc/arvados-cwl-runner', ['LICENSE-2.0.txt', 'README.rst']),
       ],
index 91283b0b622c5719630c82c19bb3c2c46ac7133f..8a380ff80b3c811ab2c8e050392f679674b2b20b 100644 (file)
@@ -123,7 +123,7 @@ class TestContainer(unittest.TestCase):
                 "baseCommand": "ls",
                 "arguments": [{"valueFrom": "$(runtime.outdir)"}],
                 "id": "#",
-                "class": "CommandLineTool"
+                "class": "org.w3id.cwl.cwl.CommandLineTool"
             })
 
             loadingContext, runtimeContext = self.helper(runner, enable_reuse)
@@ -206,7 +206,7 @@ class TestContainer(unittest.TestCase):
             }],
             "baseCommand": "ls",
             "id": "#",
-            "class": "CommandLineTool"
+            "class": "org.w3id.cwl.cwl.CommandLineTool"
         })
 
         loadingContext, runtimeContext = self.helper(runner)
@@ -314,7 +314,7 @@ class TestContainer(unittest.TestCase):
             }],
             "baseCommand": "ls",
             "id": "#",
-            "class": "CommandLineTool"
+            "class": "org.w3id.cwl.cwl.CommandLineTool"
         })
 
         loadingContext, runtimeContext = self.helper(runner)
@@ -414,7 +414,7 @@ class TestContainer(unittest.TestCase):
             "stdin": "/keep/99999999999999999999999999999996+99/file.txt",
             "arguments": [{"valueFrom": "$(runtime.outdir)"}],
             "id": "#",
-            "class": "CommandLineTool"
+            "class": "org.w3id.cwl.cwl.CommandLineTool"
         })
 
         loadingContext, runtimeContext = self.helper(runner)
@@ -639,7 +639,7 @@ class TestContainer(unittest.TestCase):
             "baseCommand": "ls",
             "arguments": [{"valueFrom": "$(runtime.outdir)"}],
             "id": "#",
-            "class": "CommandLineTool"
+            "class": "org.w3id.cwl.cwl.CommandLineTool"
         })
 
         loadingContext, runtimeContext = self.helper(runner)
@@ -720,7 +720,7 @@ class TestContainer(unittest.TestCase):
         document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.1")
 
         tool = cmap({"arguments": ["md5sum", "example.conf"],
-                     "class": "CommandLineTool",
+                     "class": "org.w3id.cwl.cwl.CommandLineTool",
                      "hints": [
                          {
                              "class": "http://commonwl.org/cwltool#Secrets",
@@ -819,7 +819,7 @@ class TestContainer(unittest.TestCase):
             "baseCommand": "ls",
             "arguments": [{"valueFrom": "$(runtime.outdir)"}],
             "id": "#",
-            "class": "CommandLineTool",
+            "class": "org.w3id.cwl.cwl.CommandLineTool",
             "hints": [
                 {
                     "class": "ToolTimeLimit",
@@ -862,7 +862,7 @@ class TestContainer(unittest.TestCase):
             "baseCommand": "ls",
             "arguments": [{"valueFrom": "$(runtime.outdir)"}],
             "id": "#",
-            "class": "CommandLineTool",
+            "class": "org.w3id.cwl.cwl.CommandLineTool",
             "hints": [
                 {
                     "class": "http://arvados.org/cwl#OutputStorageClass",
@@ -936,7 +936,7 @@ class TestContainer(unittest.TestCase):
             "baseCommand": "ls",
             "arguments": [{"valueFrom": "$(runtime.outdir)"}],
             "id": "#",
-            "class": "CommandLineTool",
+            "class": "org.w3id.cwl.cwl.CommandLineTool",
             "hints": [
             {
                 "class": "http://arvados.org/cwl#ProcessProperties",
index 4a7c18b3e06324ab0f37a2a8db54270aedcdc2c9..ad9a4da03cc5f3a7144fadb66d8430e133332866 100644 (file)
@@ -63,7 +63,6 @@ type WebDAVCacheConfig struct {
        MaxBlockEntries      int
        MaxCollectionEntries int
        MaxCollectionBytes   int64
-       MaxPermissionEntries int
        MaxUUIDEntries       int
        MaxSessions          int
 }
index 4d9db421fc3838b268fdeaeea1b81b9ca1192843..2b5df76ad6a12d7e8e557efad006f3aa25f128d5 100644 (file)
@@ -5,6 +5,7 @@
 package arvados
 
 import (
+       "bytes"
        "context"
        "encoding/json"
        "fmt"
@@ -1040,38 +1041,64 @@ func (dn *dirnode) marshalManifest(ctx context.Context, prefix string) (string,
 }
 
 func (dn *dirnode) loadManifest(txt string) error {
-       var dirname string
-       streams := strings.Split(txt, "\n")
-       if streams[len(streams)-1] != "" {
+       streams := bytes.Split([]byte(txt), []byte{'\n'})
+       if len(streams[len(streams)-1]) != 0 {
                return fmt.Errorf("line %d: no trailing newline", len(streams))
        }
        streams = streams[:len(streams)-1]
        segments := []storedSegment{}
+       // To reduce allocs, we reuse a single "pathparts" slice
+       // (pre-split on "/" separators) for the duration of this
+       // func.
+       var pathparts []string
+       // To reduce allocs, we reuse a single "toks" slice of 3 byte
+       // slices.
+       var toks = make([][]byte, 3)
+       // Similar to bytes.SplitN(token, []byte{c}, 3), but splits
+       // into the toks slice rather than allocating a new one, and
+       // returns the number of toks (1, 2, or 3).
+       splitToToks := func(src []byte, c rune) int {
+               c1 := bytes.IndexRune(src, c)
+               if c1 < 0 {
+                       toks[0] = src
+                       return 1
+               }
+               toks[0], src = src[:c1], src[c1+1:]
+               c2 := bytes.IndexRune(src, c)
+               if c2 < 0 {
+                       toks[1] = src
+                       return 2
+               }
+               toks[1], toks[2] = src[:c2], src[c2+1:]
+               return 3
+       }
        for i, stream := range streams {
                lineno := i + 1
                var anyFileTokens bool
                var pos int64
                var segIdx int
                segments = segments[:0]
-               for i, token := range strings.Split(stream, " ") {
+               pathparts = nil
+               streamparts := 0
+               for i, token := range bytes.Split(stream, []byte{' '}) {
                        if i == 0 {
-                               dirname = manifestUnescape(token)
+                               pathparts = strings.Split(manifestUnescape(string(token)), "/")
+                               streamparts = len(pathparts)
                                continue
                        }
-                       if !strings.Contains(token, ":") {
+                       if !bytes.ContainsRune(token, ':') {
                                if anyFileTokens {
                                        return fmt.Errorf("line %d: bad file segment %q", lineno, token)
                                }
-                               toks := strings.SplitN(token, "+", 3)
-                               if len(toks) < 2 {
+                               if splitToToks(token, '+') < 2 {
                                        return fmt.Errorf("line %d: bad locator %q", lineno, token)
                                }
-                               length, err := strconv.ParseInt(toks[1], 10, 32)
+                               length, err := strconv.ParseInt(string(toks[1]), 10, 32)
                                if err != nil || length < 0 {
                                        return fmt.Errorf("line %d: bad locator %q", lineno, token)
                                }
                                segments = append(segments, storedSegment{
-                                       locator: token,
+                                       locator: string(token),
                                        size:    int(length),
                                        offset:  0,
                                        length:  int(length),
@@ -1080,23 +1107,26 @@ func (dn *dirnode) loadManifest(txt string) error {
                        } else if len(segments) == 0 {
                                return fmt.Errorf("line %d: bad locator %q", lineno, token)
                        }
-
-                       toks := strings.SplitN(token, ":", 3)
-                       if len(toks) != 3 {
+                       if splitToToks(token, ':') != 3 {
                                return fmt.Errorf("line %d: bad file segment %q", lineno, token)
                        }
                        anyFileTokens = true
 
-                       offset, err := strconv.ParseInt(toks[0], 10, 64)
+                       offset, err := strconv.ParseInt(string(toks[0]), 10, 64)
                        if err != nil || offset < 0 {
                                return fmt.Errorf("line %d: bad file segment %q", lineno, token)
                        }
-                       length, err := strconv.ParseInt(toks[1], 10, 64)
+                       length, err := strconv.ParseInt(string(toks[1]), 10, 64)
                        if err != nil || length < 0 {
                                return fmt.Errorf("line %d: bad file segment %q", lineno, token)
                        }
-                       name := dirname + "/" + manifestUnescape(toks[2])
-                       fnode, err := dn.createFileAndParents(name)
+                       if !bytes.ContainsAny(toks[2], `\/`) {
+                               // optimization for a common case
+                               pathparts = append(pathparts[:streamparts], string(toks[2]))
+                       } else {
+                               pathparts = append(pathparts[:streamparts], strings.Split(manifestUnescape(string(toks[2])), "/")...)
+                       }
+                       fnode, err := dn.createFileAndParents(pathparts)
                        if fnode == nil && err == nil && length == 0 {
                                // Special case: an empty file used as
                                // a marker to preserve an otherwise
@@ -1104,7 +1134,7 @@ func (dn *dirnode) loadManifest(txt string) error {
                                continue
                        }
                        if err != nil || (fnode == nil && length != 0) {
-                               return fmt.Errorf("line %d: cannot use path %q with length %d: %s", lineno, name, length, err)
+                               return fmt.Errorf("line %d: cannot use name %q with length %d: %s", lineno, toks[2], length, err)
                        }
                        // Map the stream offset/range coordinates to
                        // block/offset/range coordinates and add
@@ -1155,7 +1185,7 @@ func (dn *dirnode) loadManifest(txt string) error {
                        return fmt.Errorf("line %d: no file segments", lineno)
                } else if len(segments) == 0 {
                        return fmt.Errorf("line %d: no locators", lineno)
-               } else if dirname == "" {
+               } else if streamparts == 0 {
                        return fmt.Errorf("line %d: no stream name", lineno)
                }
        }
@@ -1166,9 +1196,11 @@ func (dn *dirnode) loadManifest(txt string) error {
 //
 // If path is a "parent directory exists" marker (the last path
 // component is "."), the returned values are both nil.
-func (dn *dirnode) createFileAndParents(path string) (fn *filenode, err error) {
+//
+// Newly added nodes have modtime==0. Caller is responsible for fixing
+// them with backdateTree.
+func (dn *dirnode) createFileAndParents(names []string) (fn *filenode, err error) {
        var node inode = dn
-       names := strings.Split(path, "/")
        basename := names[len(names)-1]
        for _, name := range names[:len(names)-1] {
                switch name {
@@ -1182,12 +1214,12 @@ func (dn *dirnode) createFileAndParents(path string) (fn *filenode, err error) {
                        node = node.Parent()
                        continue
                }
-               modtime := node.Parent().FileInfo().ModTime()
                node.Lock()
-               locked := node
+               unlock := node.Unlock
                node, err = node.Child(name, func(child inode) (inode, error) {
                        if child == nil {
-                               child, err := node.FS().newNode(name, 0755|os.ModeDir, modtime)
+                               // note modtime will be fixed later in backdateTree()
+                               child, err := node.FS().newNode(name, 0755|os.ModeDir, time.Time{})
                                if err != nil {
                                        return nil, err
                                }
@@ -1199,7 +1231,7 @@ func (dn *dirnode) createFileAndParents(path string) (fn *filenode, err error) {
                                return child, nil
                        }
                })
-               locked.Unlock()
+               unlock()
                if err != nil {
                        return
                }
@@ -1207,16 +1239,15 @@ func (dn *dirnode) createFileAndParents(path string) (fn *filenode, err error) {
        if basename == "." {
                return
        } else if !permittedName(basename) {
-               err = fmt.Errorf("invalid file part %q in path %q", basename, path)
+               err = fmt.Errorf("invalid file part %q in path %q", basename, names)
                return
        }
-       modtime := node.FileInfo().ModTime()
        node.Lock()
        defer node.Unlock()
        _, err = node.Child(basename, func(child inode) (inode, error) {
                switch child := child.(type) {
                case nil:
-                       child, err = node.FS().newNode(basename, 0755, modtime)
+                       child, err = node.FS().newNode(basename, 0755, time.Time{})
                        if err != nil {
                                return nil, err
                        }
index c032b07166fa6abd985f6c902c07c9e4c6e37f25..beb4d61fcf72ef7696952b3bf37179334ff3abd7 100644 (file)
@@ -1433,6 +1433,31 @@ func (s *CollectionFSSuite) TestEdgeCaseManifests(c *check.C) {
        }
 }
 
+var bigmanifest = func() string {
+       var buf bytes.Buffer
+       for i := 0; i < 2000; i++ {
+               fmt.Fprintf(&buf, "./dir%d", i)
+               for i := 0; i < 100; i++ {
+                       fmt.Fprintf(&buf, " d41d8cd98f00b204e9800998ecf8427e+99999")
+               }
+               for i := 0; i < 2000; i++ {
+                       fmt.Fprintf(&buf, " 1200000:300000:file%d", i)
+               }
+               fmt.Fprintf(&buf, "\n")
+       }
+       return buf.String()
+}()
+
+func (s *CollectionFSSuite) BenchmarkParseManifest(c *check.C) {
+       DebugLocksPanicMode = false
+       c.Logf("test manifest is %d bytes", len(bigmanifest))
+       for i := 0; i < c.N; i++ {
+               fs, err := (&Collection{ManifestText: bigmanifest}).FileSystem(s.client, s.kc)
+               c.Check(err, check.IsNil)
+               c.Check(fs, check.NotNil)
+       }
+}
+
 func (s *CollectionFSSuite) checkMemSize(c *check.C, f File) {
        fn := f.(*filehandle).inode.(*filenode)
        var memsize int64
index 9dfe0436dec9bdf22eb71ad9bfe2e8a201ee3ab6..bc07851835e2471ee9f1055b689fe6a789ea4d62 100644 (file)
@@ -720,11 +720,11 @@ class KeepClient(object):
             result = service.last_result()
 
             if not success:
-                if result.get('status_code', None):
+                if result.get('status_code'):
                     _logger.debug("Request fail: PUT %s => %s %s",
                                   self.data_hash,
-                                  result['status_code'],
-                                  result['body'])
+                                  result.get('status_code'),
+                                  result.get('body'))
                 raise self.TaskFailed()
 
             _logger.debug("KeepWriterThread %s succeeded %s+%i %s",
index ef95674a6477b9ba1d57c91b2544022232bc206c..311a139906aa4cb9d0a3d28b178767324a49837f 100644 (file)
@@ -48,17 +48,13 @@ setup(name='arvados-python-client',
       install_requires=[
           'ciso8601 >=2.0.0',
           'future',
-          'google-api-python-client >=1.6.2, <1.7',
+          'google-api-python-client >=1.6.2, <2',
           'httplib2 >=0.9.2',
           'pycurl >=7.19.5.1',
-          'ruamel.yaml >=0.15.54, <=0.16.5',
+          'ruamel.yaml >=0.15.54, <=0.17.11',
           'setuptools',
           'ws4py >=0.4.2',
       ],
-      extras_require={
-          ':os.name=="posix" and python_version<"3"': ['subprocess32 >= 3.5.1'],
-          ':python_version<"3"': ['pytz'],
-      },
       classifiers=[
           'Programming Language :: Python :: 3',
       ],
index b1c42fd2b3a1475934a0c6090e12139750210f46..b2160e549b538655eb5863907d87fb1560ce3ba5 100644 (file)
@@ -1300,6 +1300,8 @@ class AvoidOverreplication(unittest.TestCase, tutil.ApiClientMock):
         def last_result(self):
             if self.will_succeed:
                 return self._result
+            else:
+                return {"status_code": 500, "body": "didn't succeed"}
 
         def finished(self):
             return False
index a52af804841fb58f4b837893ae83e3cb76d960b4..d2c79326af1ec45a03cb852f3e69b96c61b590d3 100644 (file)
@@ -14,6 +14,7 @@ import (
        "git.arvados.org/arvados.git/sdk/go/keepclient"
        lru "github.com/hashicorp/golang-lru"
        "github.com/prometheus/client_golang/prometheus"
+       "github.com/sirupsen/logrus"
 )
 
 const metricsUpdateInterval = time.Second / 10
@@ -21,13 +22,16 @@ const metricsUpdateInterval = time.Second / 10
 type cache struct {
        cluster     *arvados.Cluster
        config      *arvados.WebDAVCacheConfig // TODO: use cluster.Collections.WebDAV instead
+       logger      logrus.FieldLogger
        registry    *prometheus.Registry
        metrics     cacheMetrics
        pdhs        *lru.TwoQueueCache
        collections *lru.TwoQueueCache
-       permissions *lru.TwoQueueCache
        sessions    *lru.TwoQueueCache
        setupOnce   sync.Once
+
+       chPruneSessions    chan struct{}
+       chPruneCollections chan struct{}
 }
 
 type cacheMetrics struct {
@@ -37,7 +41,6 @@ type cacheMetrics struct {
        sessionEntries    prometheus.Gauge
        collectionHits    prometheus.Counter
        pdhHits           prometheus.Counter
-       permissionHits    prometheus.Counter
        sessionHits       prometheus.Counter
        sessionMisses     prometheus.Counter
        apiCalls          prometheus.Counter
@@ -65,13 +68,6 @@ func (m *cacheMetrics) setup(reg *prometheus.Registry) {
                Help:      "Number of uuid-to-pdh cache hits.",
        })
        reg.MustRegister(m.pdhHits)
-       m.permissionHits = prometheus.NewCounter(prometheus.CounterOpts{
-               Namespace: "arvados",
-               Subsystem: "keepweb_collectioncache",
-               Name:      "permission_hits",
-               Help:      "Number of targetID-to-permission cache hits.",
-       })
-       reg.MustRegister(m.permissionHits)
        m.apiCalls = prometheus.NewCounter(prometheus.CounterOpts{
                Namespace: "arvados",
                Subsystem: "keepweb_collectioncache",
@@ -117,8 +113,9 @@ func (m *cacheMetrics) setup(reg *prometheus.Registry) {
 }
 
 type cachedPDH struct {
-       expire time.Time
-       pdh    string
+       expire  time.Time
+       refresh time.Time
+       pdh     string
 }
 
 type cachedCollection struct {
@@ -149,10 +146,6 @@ func (c *cache) setup() {
        if err != nil {
                panic(err)
        }
-       c.permissions, err = lru.New2Q(c.config.MaxPermissionEntries)
-       if err != nil {
-               panic(err)
-       }
        c.sessions, err = lru.New2Q(c.config.MaxSessions)
        if err != nil {
                panic(err)
@@ -168,6 +161,18 @@ func (c *cache) setup() {
                        c.updateGauges()
                }
        }()
+       c.chPruneCollections = make(chan struct{}, 1)
+       go func() {
+               for range c.chPruneCollections {
+                       c.pruneCollections()
+               }
+       }()
+       c.chPruneSessions = make(chan struct{}, 1)
+       go func() {
+               for range c.chPruneSessions {
+                       c.pruneSessions()
+               }
+       }()
 }
 
 func (c *cache) updateGauges() {
@@ -192,19 +197,25 @@ func (c *cache) Update(client *arvados.Client, coll arvados.Collection, fs arvad
        }
        coll.ManifestText = m
        var updated arvados.Collection
-       defer c.pdhs.Remove(coll.UUID)
        err = client.RequestAndDecode(&updated, "PATCH", "arvados/v1/collections/"+coll.UUID, nil, map[string]interface{}{
                "collection": map[string]string{
                        "manifest_text": coll.ManifestText,
                },
        })
-       if err == nil {
-               c.collections.Add(client.AuthToken+"\000"+updated.PortableDataHash, &cachedCollection{
-                       expire:     time.Now().Add(time.Duration(c.config.TTL)),
-                       collection: &updated,
-               })
+       if err != nil {
+               c.pdhs.Remove(coll.UUID)
+               return err
        }
-       return err
+       c.collections.Add(client.AuthToken+"\000"+updated.PortableDataHash, &cachedCollection{
+               expire:     time.Now().Add(time.Duration(c.config.TTL)),
+               collection: &updated,
+       })
+       c.pdhs.Add(coll.UUID, &cachedPDH{
+               expire:  time.Now().Add(time.Duration(c.config.TTL)),
+               refresh: time.Now().Add(time.Duration(c.config.UUIDTTL)),
+               pdh:     updated.PortableDataHash,
+       })
+       return nil
 }
 
 // ResetSession unloads any potentially stale state. Should be called
@@ -246,7 +257,10 @@ func (c *cache) GetSession(token string) (arvados.CustomFileSystem, *cachedSessi
        } else {
                c.metrics.sessionHits.Inc()
        }
-       go c.pruneSessions()
+       select {
+       case c.chPruneSessions <- struct{}{}:
+       default:
+       }
        fs, _ := sess.fs.Load().(arvados.CustomFileSystem)
        if fs != nil && !expired {
                return fs, sess, nil
@@ -302,19 +316,7 @@ func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceRelo
        c.setupOnce.Do(c.setup)
        c.metrics.requests.Inc()
 
-       permOK := false
-       permKey := arv.ApiToken + "\000" + targetID
-       if forceReload {
-       } else if ent, cached := c.permissions.Get(permKey); cached {
-               ent := ent.(*cachedPermission)
-               if ent.expire.Before(time.Now()) {
-                       c.permissions.Remove(permKey)
-               } else {
-                       permOK = true
-                       c.metrics.permissionHits.Inc()
-               }
-       }
-
+       var pdhRefresh bool
        var pdh string
        if arvadosclient.PDHMatch(targetID) {
                pdh = targetID
@@ -324,22 +326,29 @@ func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceRelo
                        c.pdhs.Remove(targetID)
                } else {
                        pdh = ent.pdh
+                       pdhRefresh = forceReload || time.Now().After(ent.refresh)
                        c.metrics.pdhHits.Inc()
                }
        }
 
-       var collection *arvados.Collection
-       if pdh != "" {
-               collection = c.lookupCollection(arv.ApiToken + "\000" + pdh)
-       }
-
-       if collection != nil && permOK {
-               return collection, nil
-       } else if collection != nil {
-               // Ask API for current PDH for this targetID. Most
-               // likely, the cached PDH is still correct; if so,
-               // _and_ the current token has permission, we can
-               // use our cached manifest.
+       if pdh == "" {
+               // UUID->PDH mapping is not cached, might as well get
+               // the whole collection record and be done (below).
+               c.logger.Debugf("cache(%s): have no pdh", targetID)
+       } else if cached := c.lookupCollection(arv.ApiToken + "\000" + pdh); cached == nil {
+               // PDH->manifest is not cached, might as well get the
+               // whole collection record (below).
+               c.logger.Debugf("cache(%s): have pdh %s but manifest is not cached", targetID, pdh)
+       } else if !pdhRefresh {
+               // We looked up UUID->PDH very recently, and we still
+               // have the manifest for that PDH.
+               c.logger.Debugf("cache(%s): have pdh %s and refresh not needed", targetID, pdh)
+               return cached, nil
+       } else {
+               // Get current PDH for this UUID (and confirm we still
+               // have read permission).  Most likely, the cached PDH
+               // is still correct, in which case we can use our
+               // cached manifest.
                c.metrics.apiCalls.Inc()
                var current arvados.Collection
                err := arv.Get("collections", targetID, selectPDH, &current)
@@ -347,47 +356,47 @@ func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceRelo
                        return nil, err
                }
                if current.PortableDataHash == pdh {
-                       c.permissions.Add(permKey, &cachedPermission{
-                               expire: time.Now().Add(time.Duration(c.config.TTL)),
-                       })
-                       if pdh != targetID {
-                               c.pdhs.Add(targetID, &cachedPDH{
-                                       expire: time.Now().Add(time.Duration(c.config.UUIDTTL)),
-                                       pdh:    pdh,
-                               })
-                       }
-                       return collection, err
+                       // PDH has not changed, cached manifest is
+                       // correct.
+                       c.logger.Debugf("cache(%s): verified cached pdh %s is still correct", targetID, pdh)
+                       return cached, nil
                }
-               // PDH changed, but now we know we have
-               // permission -- and maybe we already have the
-               // new PDH in the cache.
-               if coll := c.lookupCollection(arv.ApiToken + "\000" + current.PortableDataHash); coll != nil {
-                       return coll, nil
+               if cached := c.lookupCollection(arv.ApiToken + "\000" + current.PortableDataHash); cached != nil {
+                       // PDH changed, and we already have the
+                       // manifest for that new PDH.
+                       c.logger.Debugf("cache(%s): cached pdh %s was stale, new pdh is %s and manifest is already in cache", targetID, pdh, current.PortableDataHash)
+                       return cached, nil
                }
        }
 
-       // Collection manifest is not cached.
+       // Either UUID->PDH is not cached, or PDH->manifest is not
+       // cached.
+       var retrieved arvados.Collection
        c.metrics.apiCalls.Inc()
-       err := arv.Get("collections", targetID, nil, &collection)
+       err := arv.Get("collections", targetID, nil, &retrieved)
        if err != nil {
                return nil, err
        }
+       c.logger.Debugf("cache(%s): retrieved manifest, caching with pdh %s", targetID, retrieved.PortableDataHash)
        exp := time.Now().Add(time.Duration(c.config.TTL))
-       c.permissions.Add(permKey, &cachedPermission{
-               expire: exp,
-       })
-       c.pdhs.Add(targetID, &cachedPDH{
-               expire: time.Now().Add(time.Duration(c.config.UUIDTTL)),
-               pdh:    collection.PortableDataHash,
-       })
-       c.collections.Add(arv.ApiToken+"\000"+collection.PortableDataHash, &cachedCollection{
+       if targetID != retrieved.PortableDataHash {
+               c.pdhs.Add(targetID, &cachedPDH{
+                       expire:  exp,
+                       refresh: time.Now().Add(time.Duration(c.config.UUIDTTL)),
+                       pdh:     retrieved.PortableDataHash,
+               })
+       }
+       c.collections.Add(arv.ApiToken+"\000"+retrieved.PortableDataHash, &cachedCollection{
                expire:     exp,
-               collection: collection,
+               collection: &retrieved,
        })
-       if int64(len(collection.ManifestText)) > c.config.MaxCollectionBytes/int64(c.config.MaxCollectionEntries) {
-               go c.pruneCollections()
+       if int64(len(retrieved.ManifestText)) > c.config.MaxCollectionBytes/int64(c.config.MaxCollectionEntries) {
+               select {
+               case c.chPruneCollections <- struct{}{}:
+               default:
+               }
        }
-       return collection, nil
+       return &retrieved, nil
 }
 
 // pruneCollections checks the total bytes occupied by manifest_text
index 0a0eef6e44ee36d7d8f302e38d47133285efab6d..3e2faaff726ed52e45991fc9f50d3b986672e001 100644 (file)
@@ -10,6 +10,7 @@ import (
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/arvadosclient"
        "git.arvados.org/arvados.git/sdk/go/arvadostest"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
        "github.com/prometheus/client_golang/prometheus"
        "github.com/prometheus/common/expfmt"
        "gopkg.in/check.v1"
@@ -33,7 +34,7 @@ func (s *UnitSuite) TestCache(c *check.C) {
        arv, err := arvadosclient.MakeArvadosClient()
        c.Assert(err, check.Equals, nil)
 
-       cache := newConfig(s.Config).Cache
+       cache := newConfig(ctxlog.TestLogger(c), s.Config).Cache
        cache.registry = prometheus.NewRegistry()
 
        // Hit the same collection 5 times using the same token. Only
@@ -51,7 +52,6 @@ func (s *UnitSuite) TestCache(c *check.C) {
        s.checkCacheMetrics(c, cache.registry,
                "requests 5",
                "hits 4",
-               "permission_hits 4",
                "pdh_hits 4",
                "api_calls 1")
 
@@ -72,7 +72,6 @@ func (s *UnitSuite) TestCache(c *check.C) {
        s.checkCacheMetrics(c, cache.registry,
                "requests 6",
                "hits 4",
-               "permission_hits 4",
                "pdh_hits 4",
                "api_calls 2")
 
@@ -85,7 +84,6 @@ func (s *UnitSuite) TestCache(c *check.C) {
        s.checkCacheMetrics(c, cache.registry,
                "requests 7",
                "hits 5",
-               "permission_hits 5",
                "pdh_hits 4",
                "api_calls 2")
 
@@ -105,7 +103,6 @@ func (s *UnitSuite) TestCache(c *check.C) {
        s.checkCacheMetrics(c, cache.registry,
                "requests 27",
                "hits 23",
-               "permission_hits 23",
                "pdh_hits 22",
                "api_calls 4")
 }
@@ -114,7 +111,7 @@ func (s *UnitSuite) TestCacheForceReloadByPDH(c *check.C) {
        arv, err := arvadosclient.MakeArvadosClient()
        c.Assert(err, check.Equals, nil)
 
-       cache := newConfig(s.Config).Cache
+       cache := newConfig(ctxlog.TestLogger(c), s.Config).Cache
        cache.registry = prometheus.NewRegistry()
 
        for _, forceReload := range []bool{false, true, false, true} {
@@ -125,16 +122,15 @@ func (s *UnitSuite) TestCacheForceReloadByPDH(c *check.C) {
        s.checkCacheMetrics(c, cache.registry,
                "requests 4",
                "hits 3",
-               "permission_hits 1",
                "pdh_hits 0",
-               "api_calls 3")
+               "api_calls 1")
 }
 
 func (s *UnitSuite) TestCacheForceReloadByUUID(c *check.C) {
        arv, err := arvadosclient.MakeArvadosClient()
        c.Assert(err, check.Equals, nil)
 
-       cache := newConfig(s.Config).Cache
+       cache := newConfig(ctxlog.TestLogger(c), s.Config).Cache
        cache.registry = prometheus.NewRegistry()
 
        for _, forceReload := range []bool{false, true, false, true} {
@@ -145,7 +141,6 @@ func (s *UnitSuite) TestCacheForceReloadByUUID(c *check.C) {
        s.checkCacheMetrics(c, cache.registry,
                "requests 4",
                "hits 3",
-               "permission_hits 1",
                "pdh_hits 3",
                "api_calls 3")
 }
index e883e806ccf509fc87a73f592300619e61901fd3..55c122b0ff7123ba4eb1c670900a5d3c6b5c4dba 100644 (file)
@@ -50,7 +50,7 @@ func (s *UnitSuite) SetUpTest(c *check.C) {
 }
 
 func (s *UnitSuite) TestCORSPreflight(c *check.C) {
-       h := handler{Config: newConfig(s.Config)}
+       h := handler{Config: newConfig(ctxlog.TestLogger(c), s.Config)}
        u := mustParseURL("http://keep-web.example/c=" + arvadostest.FooCollection + "/foo")
        req := &http.Request{
                Method:     "OPTIONS",
@@ -109,7 +109,7 @@ func (s *UnitSuite) TestEmptyResponse(c *check.C) {
                        c.Assert(err, check.IsNil)
                }
 
-               h := handler{Config: newConfig(s.Config)}
+               h := handler{Config: newConfig(ctxlog.TestLogger(c), s.Config)}
                u := mustParseURL("http://" + arvadostest.FooCollection + ".keep-web.example/foo")
                req := &http.Request{
                        Method:     "GET",
@@ -159,7 +159,7 @@ func (s *UnitSuite) TestInvalidUUID(c *check.C) {
                        RequestURI: u.RequestURI(),
                }
                resp := httptest.NewRecorder()
-               cfg := newConfig(s.Config)
+               cfg := newConfig(ctxlog.TestLogger(c), s.Config)
                cfg.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
                h := handler{Config: cfg}
                h.ServeHTTP(resp, req)
@@ -1067,7 +1067,7 @@ func (s *IntegrationSuite) TestFileContentType(c *check.C) {
                contentType string
        }{
                {"picture.txt", "BMX bikes are small this year\n", "text/plain; charset=utf-8"},
-               {"picture.bmp", "BMX bikes are small this year\n", "image/x-ms-bmp"},
+               {"picture.bmp", "BMX bikes are small this year\n", "image/(x-ms-)?bmp"},
                {"picture.jpg", "BMX bikes are small this year\n", "image/jpeg"},
                {"picture1", "BMX bikes are small this year\n", "image/bmp"},            // content sniff; "BM" is the magic signature for .bmp
                {"picture2", "Cars are small this year\n", "text/plain; charset=utf-8"}, // content sniff
@@ -1103,7 +1103,7 @@ func (s *IntegrationSuite) TestFileContentType(c *check.C) {
                resp := httptest.NewRecorder()
                s.testServer.Handler.ServeHTTP(resp, req)
                c.Check(resp.Code, check.Equals, http.StatusOK)
-               c.Check(resp.Header().Get("Content-Type"), check.Equals, trial.contentType)
+               c.Check(resp.Header().Get("Content-Type"), check.Matches, trial.contentType)
                c.Check(resp.Body.String(), check.Equals, trial.content)
        }
 }
@@ -1241,7 +1241,7 @@ func (s *IntegrationSuite) checkUploadDownloadRequest(c *check.C, h *handler, re
 }
 
 func (s *IntegrationSuite) TestDownloadLoggingPermission(c *check.C) {
-       config := newConfig(s.ArvConfig)
+       config := newConfig(ctxlog.TestLogger(c), s.ArvConfig)
        h := handler{Config: config}
        u := mustParseURL("http://" + arvadostest.FooCollection + ".keep-web.example/foo")
 
@@ -1314,7 +1314,7 @@ func (s *IntegrationSuite) TestDownloadLoggingPermission(c *check.C) {
 }
 
 func (s *IntegrationSuite) TestUploadLoggingPermission(c *check.C) {
-       config := newConfig(s.ArvConfig)
+       config := newConfig(ctxlog.TestLogger(c), s.ArvConfig)
        h := handler{Config: config}
 
        for _, adminperm := range []bool{true, false} {
index a62e0abb667ca5211994211e9f401c271e2e5b5b..a9ac834a20cedf21f80fcc5e9a7742f86bb0e812 100644 (file)
@@ -29,7 +29,7 @@ type Config struct {
        cluster *arvados.Cluster
 }
 
-func newConfig(arvCfg *arvados.Config) *Config {
+func newConfig(logger logrus.FieldLogger, arvCfg *arvados.Config) *Config {
        cfg := Config{}
        var cls *arvados.Cluster
        var err error
@@ -39,6 +39,7 @@ func newConfig(arvCfg *arvados.Config) *Config {
        cfg.cluster = cls
        cfg.Cache.config = &cfg.cluster.Collections.WebDAVCache
        cfg.Cache.cluster = cls
+       cfg.Cache.logger = logger
        return &cfg
 }
 
@@ -81,7 +82,7 @@ func configure(logger log.FieldLogger, args []string) *Config {
        if err != nil {
                log.Fatal(err)
        }
-       cfg := newConfig(arvCfg)
+       cfg := newConfig(logger, arvCfg)
 
        if *dumpConfig {
                out, err := yaml.Marshal(cfg)
index 21d767208740dedd148051714410747e0f0a8da2..54e9ddf3704d606a14c2b8965c1bf611cacb76bd 100644 (file)
@@ -393,7 +393,6 @@ func (s *IntegrationSuite) TestMetrics(c *check.C) {
        c.Check(counters["arvados_keepweb_collectioncache_api_calls//"].Value, check.Equals, int64(2))
        c.Check(counters["arvados_keepweb_collectioncache_hits//"].Value, check.Equals, int64(1))
        c.Check(counters["arvados_keepweb_collectioncache_pdh_hits//"].Value, check.Equals, int64(1))
-       c.Check(counters["arvados_keepweb_collectioncache_permission_hits//"].Value, check.Equals, int64(1))
        c.Check(gauges["arvados_keepweb_collectioncache_cached_manifests//"].Value, check.Equals, float64(1))
        // FooCollection's cached manifest size is 45 ("1f4b0....+45") plus one 51-byte blob signature
        c.Check(gauges["arvados_keepweb_sessions_cached_collection_bytes//"].Value, check.Equals, float64(45+51))
@@ -434,7 +433,7 @@ func (s *IntegrationSuite) SetUpTest(c *check.C) {
        ldr.Path = "-"
        arvCfg, err := ldr.Load()
        c.Check(err, check.IsNil)
-       cfg := newConfig(arvCfg)
+       cfg := newConfig(ctxlog.TestLogger(c), arvCfg)
        c.Assert(err, check.IsNil)
        cfg.Client = arvados.Client{
                APIHost:  testAPIHost,
index 1b6ad93de7dd2207c97a56a8e6ff0365391b09b0..f90f85cd93691c1fa8ed389b6a48bb17d7e7c376 100644 (file)
@@ -11,11 +11,12 @@ import (
        "net/url"
 
        "git.arvados.org/arvados.git/sdk/go/arvadostest"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
        "gopkg.in/check.v1"
 )
 
 func (s *UnitSuite) TestStatus(c *check.C) {
-       h := handler{Config: newConfig(s.Config)}
+       h := handler{Config: newConfig(ctxlog.TestLogger(c), s.Config)}
        u, _ := url.Parse("http://keep-web.example/status.json")
        req := &http.Request{
                Method:     "GET",
index 24e838c8f1ec64434a13652b36b18689ddb5a216..f0c377078358cde9981e45afa66a989171c0a27c 100644 (file)
@@ -119,6 +119,7 @@ type ConfigParams struct {
        Path            string
        UserID          string
        Verbose         bool
+       CaseInsensitive bool
        ParentGroupUUID string
        ParentGroupName string
        SysUserUUID     string
@@ -152,6 +153,10 @@ func ParseFlags(config *ConfigParams) error {
                "user-id",
                "email",
                "Attribute by which every user is identified. Valid values are: email and username.")
+       caseInsensitive := flags.Bool(
+               "case-insensitive",
+               false,
+               "Performs case insensitive matching on user IDs. Off by default.")
        verbose := flags.Bool(
                "verbose",
                false,
@@ -196,6 +201,7 @@ func ParseFlags(config *ConfigParams) error {
        config.ParentGroupUUID = *parentGroupUUID
        config.UserID = *userID
        config.Verbose = *verbose
+       config.CaseInsensitive = *caseInsensitive
 
        return nil
 }
@@ -299,7 +305,11 @@ func doMain(cfg *ConfigParams) error {
        }
        defer f.Close()
 
-       log.Printf("%s %s started. Using %q as users id and parent group UUID %q", os.Args[0], version, cfg.UserID, cfg.ParentGroupUUID)
+       iCaseLog := ""
+       if cfg.UserID == "username" && cfg.CaseInsensitive {
+               iCaseLog = " - username matching requested to be case-insensitive"
+       }
+       log.Printf("%s %s started. Using %q as users id and parent group UUID %q%s", os.Args[0], version, cfg.UserID, cfg.ParentGroupUUID, iCaseLog)
 
        // Get the complete user list to minimize API Server requests
        allUsers := make(map[string]arvados.User)
@@ -316,6 +326,12 @@ func doMain(cfg *ConfigParams) error {
                if err != nil {
                        return err
                }
+               if cfg.UserID == "username" && uID != "" && cfg.CaseInsensitive {
+                       uID = strings.ToLower(uID)
+                       if uuid, found := userIDToUUID[uID]; found {
+                               return fmt.Errorf("case insensitive collision for username %q between %q and %q", uID, u.UUID, uuid)
+                       }
+               }
                userIDToUUID[uID] = u.UUID
                if cfg.Verbose {
                        log.Printf("Seen user %q (%s)", u.Username, u.UUID)
@@ -415,6 +431,9 @@ func ProcessFile(
                        membersSkipped++
                        continue
                }
+               if cfg.UserID == "username" && cfg.CaseInsensitive {
+                       groupMember = strings.ToLower(groupMember)
+               }
                if !(groupPermission == "can_read" || groupPermission == "can_write" || groupPermission == "can_manage") {
                        log.Printf("Warning: 3rd field should be 'can_read', 'can_write' or 'can_manage'. Found: %q at line %d, skipping.", groupPermission, lineNo)
                        membersSkipped++
@@ -494,9 +513,7 @@ func GetAll(c *arvados.Client, res string, params arvados.ResourceListParams, pa
                if page.Len() == 0 {
                        break
                }
-               for _, i := range page.GetItems() {
-                       allItems = append(allItems, i)
-               }
+               allItems = append(allItems, page.GetItems()...)
                params.Offset += page.Len()
        }
        return allItems, nil
@@ -634,6 +651,9 @@ func GetRemoteGroups(cfg *ConfigParams, allUsers map[string]arvados.User) (remot
                        if err != nil {
                                return remoteGroups, groupNameToUUID, err
                        }
+                       if cfg.UserID == "username" && cfg.CaseInsensitive {
+                               memberID = strings.ToLower(memberID)
+                       }
                        membersSet[memberID] = u2gLinkSet[link.HeadUUID]
                }
                remoteGroups[group.UUID] = &GroupInfo{
@@ -714,9 +734,7 @@ func RemoveMemberLinksFromGroup(cfg *ConfigParams, user arvados.User, linkNames
                        userID, _ := GetUserID(user, cfg.UserID)
                        return fmt.Errorf("error getting links needed to remove user %q from group %q: %s", userID, group.Name, err)
                }
-               for _, link := range l {
-                       links = append(links, link)
-               }
+               links = append(links, l...)
        }
        for _, item := range links {
                link := item.(arvados.Link)
index ec2f18a307d70c9767efcdef96574aa18d2cc862..69326c98d958cacd7709d24c47b9c63abd690b78 100644 (file)
@@ -50,6 +50,7 @@ func (s *TestSuite) SetUpTest(c *C) {
        os.Args = []string{"cmd", "somefile.csv"}
        config, err := GetConfig()
        c.Assert(err, IsNil)
+       config.UserID = "email"
        // Confirm that the parent group was created
        gl = arvados.GroupList{}
        ac.RequestAndDecode(&gl, "GET", "/arvados/v1/groups", nil, params)
@@ -145,10 +146,7 @@ func GroupMembershipExists(ac *arvados.Client, userUUID string, groupUUID string
                }},
        }
        ac.RequestAndDecode(&ll, "GET", "/arvados/v1/links", nil, params)
-       if ll.Len() != 1 {
-               return false
-       }
-       return true
+       return ll.Len() == 1
 }
 
 // If named group exists, return its UUID
@@ -189,11 +187,12 @@ func RemoteGroupExists(cfg *ConfigParams, groupName string) (uuid string, err er
 
 func (s *TestSuite) TestParseFlagsWithPositionalArgument(c *C) {
        cfg := ConfigParams{}
-       os.Args = []string{"cmd", "-verbose", "/tmp/somefile.csv"}
+       os.Args = []string{"cmd", "-verbose", "-case-insensitive", "/tmp/somefile.csv"}
        err := ParseFlags(&cfg)
        c.Assert(err, IsNil)
        c.Check(cfg.Path, Equals, "/tmp/somefile.csv")
        c.Check(cfg.Verbose, Equals, true)
+       c.Check(cfg.CaseInsensitive, Equals, true)
 }
 
 func (s *TestSuite) TestParseFlagsWithoutPositionalArgument(c *C) {
@@ -450,7 +449,7 @@ func (s *TestSuite) TestIgnoreNonexistantUsers(c *C) {
        c.Assert(GroupMembershipExists(s.cfg.Client, activeUserUUID, groupUUID, "can_write"), Equals, true)
 }
 
-// Users listed on the file that don't exist on the system are ignored
+// Entries with missing data are ignored.
 func (s *TestSuite) TestIgnoreEmptyFields(c *C) {
        activeUserEmail := s.users[arvadostest.ActiveUserUUID].Email
        activeUserUUID := s.users[arvadostest.ActiveUserUUID].UUID
@@ -502,7 +501,6 @@ func (s *TestSuite) TestUseUsernames(c *C) {
        s.cfg.Path = tmpfile.Name()
        s.cfg.UserID = "username"
        err = doMain(s.cfg)
-       s.cfg.UserID = "email"
        c.Assert(err, IsNil)
        // Confirm that memberships exist
        groupUUID, err = RemoteGroupExists(s.cfg, "TestGroup1")
@@ -510,3 +508,65 @@ func (s *TestSuite) TestUseUsernames(c *C) {
        c.Assert(groupUUID, Not(Equals), "")
        c.Assert(GroupMembershipExists(s.cfg.Client, activeUserUUID, groupUUID, "can_write"), Equals, true)
 }
+
+func (s *TestSuite) TestUseUsernamesWithCaseInsensitiveMatching(c *C) {
+       activeUserName := strings.ToUpper(s.users[arvadostest.ActiveUserUUID].Username)
+       activeUserUUID := s.users[arvadostest.ActiveUserUUID].UUID
+       // Confirm that group doesn't exist
+       groupUUID, err := RemoteGroupExists(s.cfg, "TestGroup1")
+       c.Assert(err, IsNil)
+       c.Assert(groupUUID, Equals, "")
+       // Create file & run command
+       data := [][]string{
+               {"TestGroup1", activeUserName},
+       }
+       tmpfile, err := MakeTempCSVFile(data)
+       c.Assert(err, IsNil)
+       defer os.Remove(tmpfile.Name()) // clean up
+       s.cfg.Path = tmpfile.Name()
+       s.cfg.UserID = "username"
+       s.cfg.CaseInsensitive = true
+       err = doMain(s.cfg)
+       c.Assert(err, IsNil)
+       // Confirm that memberships exist
+       groupUUID, err = RemoteGroupExists(s.cfg, "TestGroup1")
+       c.Assert(err, IsNil)
+       c.Assert(groupUUID, Not(Equals), "")
+       c.Assert(GroupMembershipExists(s.cfg.Client, activeUserUUID, groupUUID, "can_write"), Equals, true)
+}
+
+func (s *TestSuite) TestUsernamesCaseInsensitiveCollision(c *C) {
+       activeUserName := s.users[arvadostest.ActiveUserUUID].Username
+       activeUserUUID := s.users[arvadostest.ActiveUserUUID].UUID
+
+       nu := arvados.User{}
+       nuUsername := strings.ToUpper(activeUserName)
+       err := s.cfg.Client.RequestAndDecode(&nu, "POST", "/arvados/v1/users", nil, map[string]interface{}{
+               "user": map[string]string{
+                       "username": nuUsername,
+               },
+       })
+       c.Assert(err, IsNil)
+
+       // Manually remove non-fixture user because /database/reset fails otherwise
+       defer s.cfg.Client.RequestAndDecode(nil, "DELETE", "/arvados/v1/users/"+nu.UUID, nil, nil)
+
+       c.Assert(nu.Username, Equals, nuUsername)
+       c.Assert(nu.UUID, Not(Equals), activeUserUUID)
+       c.Assert(nu.Username, Not(Equals), activeUserName)
+
+       data := [][]string{
+               {"SomeGroup", activeUserName},
+       }
+       tmpfile, err := MakeTempCSVFile(data)
+       c.Assert(err, IsNil)
+       defer os.Remove(tmpfile.Name()) // clean up
+
+       s.cfg.Path = tmpfile.Name()
+       s.cfg.UserID = "username"
+       s.cfg.CaseInsensitive = true
+       err = doMain(s.cfg)
+       // Should get an error because of "ACTIVE" and "Active" usernames
+       c.Assert(err, NotNil)
+       c.Assert(err, ErrorMatches, ".*case insensitive collision.*")
+}