# In each of the service sections below, the keys under
# InternalURLs are the endpoints where the service should be
- # listening, and reachable from other hosts in the cluster.
- SAMPLE:
- InternalURLs:
- "http://host1.example:12345": {}
- "http://host2.example:12345":
- # Rendezvous is normally empty/omitted. When changing the
- # URL of a Keepstore service, Rendezvous should be set to
- # the old URL (with trailing slash omitted) to preserve
- # rendezvous ordering.
- Rendezvous: ""
- SAMPLE:
- Rendezvous: ""
- ExternalURL: "-"
+ # listening, and reachable from other hosts in the
+ # cluster. Example:
+ #
+ # InternalURLs:
+ # "http://host1.example:12345": {}
+ # "http://host2.example:12345": {}
RailsAPI:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: "-"
Controller:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: ""
Websocket:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: ""
Keepbalance:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: "-"
GitHTTP:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: ""
GitSSH:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: ""
DispatchCloud:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: "-"
SSO:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: ""
Keepproxy:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: ""
WebDAV:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
# Base URL for Workbench inline preview. If blank, use
# WebDAVDownload instead, and disable inline preview.
# If both are empty, downloading collections from workbench
ExternalURL: ""
WebDAVDownload:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
# Base URL for download links. If blank, serve links to WebDAV
# with disposition=attachment query param. Unlike preview links,
# browsers do not render attachments, so there is no risk of XSS.
ExternalURL: ""
Keepstore:
- InternalURLs: {}
+ InternalURLs:
+ SAMPLE:
+ # Rendezvous is normally empty/omitted. When changing the
+ # URL of a Keepstore service, Rendezvous should be set to
+ # the old URL (with trailing slash omitted) to preserve
+ # rendezvous ordering.
+ Rendezvous: ""
ExternalURL: "-"
Composer:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: ""
WebShell:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
# ShellInABox service endpoint URL for a given VM. If empty, do not
# offer web shell logins.
#
# https://*.webshell.uuid_prefix.arvadosapi.com
ExternalURL: ""
Workbench1:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: ""
Workbench2:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: ""
Health:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: "-"
PostgreSQL:
# In each of the service sections below, the keys under
# InternalURLs are the endpoints where the service should be
- # listening, and reachable from other hosts in the cluster.
- SAMPLE:
- InternalURLs:
- "http://host1.example:12345": {}
- "http://host2.example:12345":
- # Rendezvous is normally empty/omitted. When changing the
- # URL of a Keepstore service, Rendezvous should be set to
- # the old URL (with trailing slash omitted) to preserve
- # rendezvous ordering.
- Rendezvous: ""
- SAMPLE:
- Rendezvous: ""
- ExternalURL: "-"
+ # listening, and reachable from other hosts in the
+ # cluster. Example:
+ #
+ # InternalURLs:
+ # "http://host1.example:12345": {}
+ # "http://host2.example:12345": {}
RailsAPI:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: "-"
Controller:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: ""
Websocket:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: ""
Keepbalance:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: "-"
GitHTTP:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: ""
GitSSH:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: ""
DispatchCloud:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: "-"
SSO:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: ""
Keepproxy:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: ""
WebDAV:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
# Base URL for Workbench inline preview. If blank, use
# WebDAVDownload instead, and disable inline preview.
# If both are empty, downloading collections from workbench
ExternalURL: ""
WebDAVDownload:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
# Base URL for download links. If blank, serve links to WebDAV
# with disposition=attachment query param. Unlike preview links,
# browsers do not render attachments, so there is no risk of XSS.
ExternalURL: ""
Keepstore:
- InternalURLs: {}
+ InternalURLs:
+ SAMPLE:
+ # Rendezvous is normally empty/omitted. When changing the
+ # URL of a Keepstore service, Rendezvous should be set to
+ # the old URL (with trailing slash omitted) to preserve
+ # rendezvous ordering.
+ Rendezvous: ""
ExternalURL: "-"
Composer:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: ""
WebShell:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
# ShellInABox service endpoint URL for a given VM. If empty, do not
# offer web shell logins.
#
# https://*.webshell.uuid_prefix.arvadosapi.com
ExternalURL: ""
Workbench1:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: ""
Workbench2:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: ""
Health:
- InternalURLs: {}
+ InternalURLs: {SAMPLE: {}}
ExternalURL: "-"
PostgreSQL:
ldr.configdata = buf
}
+ // FIXME: We should reject YAML if the same key is used twice
+ // in a map/object, like {foo: bar, foo: baz}. Maybe we'll get
+ // this fixed free when we upgrade ghodss/yaml to a version
+ // that uses go-yaml v3.
+
// Load the config into a dummy map to get the cluster ID
// keys, discarding the values; then set up defaults for each
// cluster ID; then load the real config on top of the
checkKeyConflict(fmt.Sprintf("Clusters.%s.PostgreSQL.Connection", id), cc.PostgreSQL.Connection),
ldr.checkEmptyKeepstores(cc),
ldr.checkUnlistedKeepstores(cc),
+ // TODO: check non-empty Rendezvous on
+ // services other than Keepstore
} {
if err != nil {
return nil, err
if ldr.Logger == nil {
return
}
- allowed := map[string]interface{}{}
- for k, v := range expected {
- allowed[strings.ToLower(k)] = v
- }
for k, vsupp := range supplied {
if k == "SAMPLE" {
// entry will be dropped in removeSampleKeys anyway
continue
}
- vexp, ok := allowed[strings.ToLower(k)]
+ vexp, ok := expected[k]
if expected["SAMPLE"] != nil {
+ // use the SAMPLE entry's keys as the
+ // "expected" map when checking vsupp
+ // recursively.
vexp = expected["SAMPLE"]
} else if !ok {
- ldr.Logger.Warnf("deprecated or unknown config entry: %s%s", prefix, k)
+ // check for a case-insensitive match
+ hint := ""
+ for ek := range expected {
+ if strings.EqualFold(k, ek) {
+ hint = " (perhaps you meant " + ek + "?)"
+ // If we don't delete this, it
+ // will end up getting merged,
+ // unpredictably
+ // merging/overriding the
+ // default.
+ delete(supplied, k)
+ break
+ }
+ }
+ ldr.Logger.Warnf("deprecated or unknown config entry: %s%s%s", prefix, k, hint)
continue
}
if vsupp, ok := vsupp.(map[string]interface{}); !ok {
SystemRootToken: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
Collections:
BlobSigningKey: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
- postgresql: {}
- BadKey: {}
- Containers: {}
+ PostgreSQL: {}
+ BadKey1: {}
+ Containers:
+ RunTimeEngine: abc
RemoteClusters:
z2222:
Host: z2222.arvadosapi.com
Proxy: true
- BadKey: badValue
+ BadKey2: badValue
+ Services:
+ KeepStore:
+ InternalURLs:
+ "http://host.example:12345": {}
+ Keepstore:
+ InternalURLs:
+ "http://host.example:12345":
+ RendezVous: x
+ ServiceS:
+ Keepstore:
+ InternalURLs:
+ "http://host.example:12345": {}
+ Volumes:
+ zzzzz-nyw5e-aaaaaaaaaaaaaaa: {}
`, &logbuf).Load()
c.Assert(err, check.IsNil)
+ c.Log(logbuf.String())
logs := strings.Split(strings.TrimSuffix(logbuf.String(), "\n"), "\n")
for _, log := range logs {
- c.Check(log, check.Matches, `.*deprecated or unknown config entry:.*BadKey.*`)
+ c.Check(log, check.Matches, `.*deprecated or unknown config entry:.*(RunTimeEngine.*RuntimeEngine|BadKey1|BadKey2|KeepStore|ServiceS|RendezVous).*`)
}
- c.Check(logs, check.HasLen, 2)
+ c.Check(logs, check.HasLen, 6)
}
func (s *LoadSuite) checkSAMPLEKeys(c *check.C, path string, x interface{}) {
_, err := testLoader(c, `
Clusters:
zzzzz:
- postgresql:
- connection:
+ PostgreSQL:
+ Connection:
DBName: dbname
Host: host
`, nil).Load()
ErrIsDirectory = errors.New("cannot rename file to overwrite existing directory")
ErrNotADirectory = errors.New("not a directory")
ErrPermission = os.ErrPermission
+ DebugLocksPanicMode = false
)
type syncer interface {
Sync() error
}
+func debugPanicIfNotLocked(l sync.Locker) {
+ if !DebugLocksPanicMode {
+ return
+ }
+ race := false
+ go func() {
+ l.Lock()
+ race = true
+ l.Unlock()
+ }()
+ time.Sleep(10)
+ if race {
+ panic("bug: caller-must-have-lock func called, but nobody has lock")
+ }
+}
+
// A File is an *os.File-like interface for reading and writing files
// in a FileSystem.
type File interface {
}
func (n *treenode) Child(name string, replace func(inode) (inode, error)) (child inode, err error) {
+ debugPanicIfNotLocked(n)
child = n.inodes[name]
if name == "" || name == "." || name == ".." {
err = ErrInvalidArgument
func (n *treenode) MemorySize() (size int64) {
n.RLock()
defer n.RUnlock()
+ debugPanicIfNotLocked(n)
for _, inode := range n.inodes {
size += inode.MemorySize()
}
node = node.Parent()
continue
}
+ modtime := node.Parent().FileInfo().ModTime()
+ node.Lock()
+ locked := node
node, err = node.Child(name, func(child inode) (inode, error) {
if child == nil {
- child, err := node.FS().newNode(name, 0755|os.ModeDir, node.Parent().FileInfo().ModTime())
+ child, err := node.FS().newNode(name, 0755|os.ModeDir, modtime)
if err != nil {
return nil, err
}
return child, nil
}
})
+ locked.Unlock()
if err != nil {
return
}
err = fmt.Errorf("invalid file part %q in path %q", basename, path)
return
}
+ modtime := node.FileInfo().ModTime()
+ node.Lock()
+ defer node.Unlock()
_, err = node.Child(basename, func(child inode) (inode, error) {
switch child := child.(type) {
case nil:
- child, err = node.FS().newNode(basename, 0755, node.FileInfo().ModTime())
+ child, err = node.FS().newNode(basename, 0755, modtime)
if err != nil {
return nil, err
}
}
func (fs *customFileSystem) MountByID(mount string) {
+ fs.root.treenode.Lock()
+ defer fs.root.treenode.Unlock()
fs.root.treenode.Child(mount, func(inode) (inode, error) {
return &vdirnode{
treenode: treenode{
}
func (fs *customFileSystem) MountProject(mount, uuid string) {
+ fs.root.treenode.Lock()
+ defer fs.root.treenode.Unlock()
fs.root.treenode.Child(mount, func(inode) (inode, error) {
return fs.newProjectNode(fs.root, mount, uuid), nil
})
}
func (fs *customFileSystem) MountUsers(mount string) {
+ fs.root.treenode.Lock()
+ defer fs.root.treenode.Unlock()
fs.root.treenode.Child(mount, func(inode) (inode, error) {
return &lookupnode{
stale: fs.Stale,
var _ = check.Suite(&SiteFSSuite{})
+func init() {
+ // Enable DebugLocksPanicMode sometimes. Don't enable it all
+ // the time, though -- it adds many calls to time.Sleep(),
+ // which could hide different bugs.
+ if time.Now().Seconds()&1 == 0 {
+ DebugLocksPanicMode = true
+ }
+}
+
type SiteFSSuite struct {
client *Client
fs CustomFileSystem
""")
_group.add_argument('--no-follow-links', action='store_false', dest='follow_links',
help="""
-Do not follow file and directory symlinks.
+Ignore file and directory symlinks. Even paths given explicitly on the
+command line will be skipped if they are symlinks.
""")
args.paths = ["-" if x == "/dev/stdin" else x for x in args.paths]
- if len(args.paths) != 1 or os.path.isdir(args.paths[0]):
- if args.filename:
- arg_parser.error("""
+ if args.filename and (len(args.paths) != 1 or os.path.isdir(args.paths[0])):
+ arg_parser.error("""
--filename argument cannot be used when storing a directory or
multiple files.
""")
self._write_stdin(self.filename or 'stdin')
elif not os.path.exists(path):
raise PathDoesNotExistError(u"file or directory '{}' does not exist.".format(path))
+ elif (not self.follow_links) and os.path.islink(path):
+ self.logger.warning("Skipping symlink '{}'".format(path))
+ continue
elif os.path.isdir(path):
# Use absolute paths on cache index so CWD doesn't interfere
# with the caching logic.
self._remote_collection.save(num_retries=self.num_retries,
trash_at=self._collection_trash_at())
else:
+ if len(self._local_collection) == 0:
+ self.logger.warning("No files were uploaded, skipping collection creation.")
+ return
self._local_collection.save_new(
name=self.name, owner_uuid=self.owner_uuid,
ensure_unique_name=self.ensure_unique_name,
output = writer.manifest_text()
elif args.raw:
output = ','.join(writer.data_locators())
- else:
+ elif writer.manifest_locator() is not None:
try:
expiration_notice = ""
if writer.collection_trash_at() is not None:
"arv-put: Error creating Collection on project: {}.".format(
error))
status = 1
+ else:
+ status = 1
# Print the locator (uuid) of the new collection.
if output is None:
shutil.rmtree(self.tempdir_with_symlink)
def test_symlinks_are_followed_by_default(self):
+ self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkeddir')))
+ self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkedfile')))
cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink])
cwriter.start(save_collection=False)
self.assertIn('linkeddir', cwriter.manifest_text())
cwriter.destroy_cache()
def test_symlinks_are_not_followed_when_requested(self):
+ self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkeddir')))
+ self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkedfile')))
cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink],
follow_links=False)
cwriter.start(save_collection=False)
self.assertNotIn('linkeddir', cwriter.manifest_text())
self.assertNotIn('linkedfile', cwriter.manifest_text())
cwriter.destroy_cache()
+ # Check for bug #17800: passed symlinks should also be ignored.
+ linked_dir = os.path.join(self.tempdir_with_symlink, 'linkeddir')
+ cwriter = arv_put.ArvPutUploadJob([linked_dir], follow_links=False)
+ cwriter.start(save_collection=False)
+ self.assertNotIn('linkeddir', cwriter.manifest_text())
+ cwriter.destroy_cache()
+
+ def test_no_empty_collection_saved(self):
+ self.assertTrue(os.path.islink(os.path.join(self.tempdir_with_symlink, 'linkeddir')))
+ linked_dir = os.path.join(self.tempdir_with_symlink, 'linkeddir')
+ cwriter = arv_put.ArvPutUploadJob([linked_dir], follow_links=False)
+ cwriter.start(save_collection=True)
+ self.assertIsNone(cwriter.manifest_locator())
+ self.assertEqual('', cwriter.manifest_text())
+ cwriter.destroy_cache()
def test_passing_nonexistant_path_raise_exception(self):
uuid_str = str(uuid.uuid4())
var _ = check.Suite(&UnitSuite{})
+func init() {
+ arvados.DebugLocksPanicMode = true
+}
+
type UnitSuite struct {
Config *arvados.Config
}
}
}
- normalizedURL := *r.URL
- normalizedURL.RawPath = ""
- normalizedURL.Path = reMultipleSlashChars.ReplaceAllString(normalizedURL.Path, "/")
- ctxlog.FromContext(r.Context()).Infof("escapedPath %s", normalizedURL.EscapedPath())
- canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", r.Method, normalizedURL.EscapedPath(), s3querystring(r.URL), canonicalHeaders, signedHeaders, r.Header.Get("X-Amz-Content-Sha256"))
+ normalizedPath := normalizePath(r.URL.Path)
+ ctxlog.FromContext(r.Context()).Debugf("normalizedPath %q", normalizedPath)
+ canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", r.Method, normalizedPath, s3querystring(r.URL), canonicalHeaders, signedHeaders, r.Header.Get("X-Amz-Content-Sha256"))
ctxlog.FromContext(r.Context()).Debugf("s3stringToSign: canonicalRequest %s", canonicalRequest)
return fmt.Sprintf("%s\n%s\n%s\n%s", alg, r.Header.Get("X-Amz-Date"), scope, hashdigest(sha256.New(), canonicalRequest)), nil
}
+func normalizePath(s string) string {
+ // (url.URL).EscapedPath() would be incorrect here. AWS
+ // documentation specifies the URL path should be normalized
+ // according to RFC 3986, i.e., unescaping ALPHA / DIGIT / "-"
+ // / "." / "_" / "~". The implication is that everything other
+ // than those chars (and "/") _must_ be percent-encoded --
+ // even chars like ";" and "," that are not normally
+ // percent-encoded in paths.
+ out := ""
+ for _, c := range []byte(reMultipleSlashChars.ReplaceAllString(s, "/")) {
+ if (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ c == '-' ||
+ c == '.' ||
+ c == '_' ||
+ c == '~' ||
+ c == '/' {
+ out += string(c)
+ } else {
+ out += fmt.Sprintf("%%%02X", c)
+ }
+ }
+ return out
+}
+
func s3signature(secretKey, scope, signedHeaders, stringToSign string) (string, error) {
// scope is {datestamp}/{region}/{service}/aws4_request
drs := strings.Split(scope, "/")
rawPath string
normalizedPath string
}{
- {"/foo", "/foo"}, // boring case
- {"/foo%5fbar", "/foo_bar"}, // _ must not be escaped
- {"/foo%2fbar", "/foo/bar"}, // / must not be escaped
- {"/(foo)", "/%28foo%29"}, // () must be escaped
- {"/foo%5bbar", "/foo%5Bbar"}, // %XX must be uppercase
+ {"/foo", "/foo"}, // boring case
+ {"/foo%5fbar", "/foo_bar"}, // _ must not be escaped
+ {"/foo%2fbar", "/foo/bar"}, // / must not be escaped
+ {"/(foo)/[];,", "/%28foo%29/%5B%5D%3B%2C"}, // ()[];, must be escaped
+ {"/foo%5bbar", "/foo%5Bbar"}, // %XX must be uppercase
+ {"//foo///.bar", "/foo/.bar"}, // "//" and "///" must be squashed to "/"
} {
+ c.Logf("trial %q", trial)
+
date := time.Now().UTC().Format("20060102T150405Z")
scope := "20200202/zzzzz/S3/aws4_request"
canonicalRequest := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", "GET", trial.normalizedPath, "", "host:host.example.com\n", "host", "")
buf, err := cmd.CombinedOutput()
c.Check(err, check.IsNil)
c.Check(string(buf), check.Matches, `.* 3 +s3://`+arvadostest.FooCollection+`/foo\n`)
+
+ // This tests whether s3cmd's path normalization agrees with
+ // keep-web's signature verification wrt chars like "|"
+ // (neither reserved nor unreserved) and "," (not normally
+ // percent-encoded in a path).
+ cmd = exec.Command("s3cmd", "--no-ssl", "--host="+s.testServer.Addr, "--host-bucket="+s.testServer.Addr, "--access_key="+arvadostest.ActiveTokenUUID, "--secret_key="+arvadostest.ActiveToken, "get", "s3://"+arvadostest.FooCollection+"/foo,;$[|]bar")
+ buf, err = cmd.CombinedOutput()
+ c.Check(err, check.NotNil)
+ c.Check(string(buf), check.Matches, `(?ms).*NoSuchKey.*\n`)
}
func (s *IntegrationSuite) TestS3BucketInHost(c *check.C) {
# The arvados-formula version. For a stable release, this should be a
# branch name (e.g. X.Y-dev) or tag for the release.
-ARVADOS_TAG="master"
+ARVADOS_TAG="main"
# Other formula versions we depend on
POSTGRES_TAG="v0.41.6"