end
test 'view file' do
+ need_selenium "phantomjs does not follow redirects reliably, maybe https://github.com/ariya/phantomjs/issues/10389"
magic = rand(2**512).to_s 36
owner = api_fixture('groups')['anonymously_accessible_project']['uuid']
col = upload_data_and_get_collection(magic, 'admin', "Hello\\040world.txt", owner)
end
test "can download an entire collection with a reader token" do
+ need_selenium "phantomjs does not follow redirects reliably, maybe https://github.com/ariya/phantomjs/issues/10389"
+
token = api_token('active')
data = "foo\nfile\n"
datablock = `echo -n #{data.shellescape} | ARVADOS_API_TOKEN=#{token.shellescape} arv-put --no-progress --raw -`.strip
token = api_fixture('api_client_authorizations')['active_all_collections']['api_token']
url_head = "/collections/download/#{uuid}/#{token}/"
visit url_head
+ assert_text "You can download individual files listed below"
# It seems that Capybara can't inspect tags outside the body, so this is
# a very blunt approach.
assert_no_match(/<\s*meta[^>]+\bnofollow\b/i, page.html,
"wget prohibited from recursing the collection page")
# Look at all the links that wget would recurse through using our
# recommended options, and check that it's exactly the file list.
- hrefs = page.all('a').map do |anchor|
- link = anchor[:href] || ''
- if link.start_with? url_head
- link[url_head.size .. -1]
- elsif link.start_with? '/'
- nil
- else
- link
- end
- end
- assert_equal(['./foo'], hrefs.compact.sort,
- "download page did provide strictly file links")
+ hrefs = []
+ page.html.scan(/href="(.*?)"/) { |m| hrefs << m[0] }
+ assert_equal(['./foo'], hrefs, "download page did provide strictly file links")
click_link "foo"
assert_text "foo\nfile\n"
end
fi
if [[ -n "$(find $WORKSPACE/packages/$TARGET -name '*.deb')" ]] ; then
+ set +e
+ /usr/bin/which dpkg-scanpackages >/dev/null
+ if [[ "$?" != "0" ]]; then
+ echo >&2
+ echo >&2 "Error: please install dpkg-dev. E.g. sudo apt-get install dpkg-dev"
+ echo >&2
+ exit 1
+ fi
+ /usr/bin/which apt-ftparchive >/dev/null
+ if [[ "$?" != "0" ]]; then
+ echo >&2
+ echo >&2 "Error: please install apt-utils. E.g. sudo apt-get install apt-utils"
+ echo >&2
+ exit 1
+ fi
+ set -e
(cd $WORKSPACE/packages/$TARGET
dpkg-scanpackages . 2> >(grep -v 'warning' 1>&2) | tee Packages | gzip -c > Packages.gz
apt-ftparchive -o APT::FTPArchive::Release::Origin=Arvados release . > Release
echo "== Interactive commands:"
echo "TARGET (short for 'test DIR')"
echo "test TARGET"
+ echo "10 test TARGET (run test 10 times)"
echo "test TARGET:py3 (test with python3)"
echo "test TARGET -check.vv (pass arguments to test)"
echo "install TARGET"
while read -p 'What next? ' -e -i "$nextcmd" nextcmd; do
history -s "$nextcmd"
history -w
+ count=1
+ if [[ "${nextcmd}" =~ ^[0-9] ]]; then
+ read count nextcmd <<<"${nextcmd}"
+ fi
read verb target opts <<<"${nextcmd}"
target="${target%/}"
target="${target/\/:/:}"
${verb}_${target}
;;
*)
- argstarget=${target%:py3}
+ argstarget=${target%:py3}
testargs["$argstarget"]="${opts}"
tt="${testfuncargs[${target}]}"
tt="${tt:-$target}"
- do_$verb $tt
+ while [ $count -gt 0 ]; do
+ do_$verb $tt
+ let "count=count-1"
+ done
;;
esac
;;
After migrating your configuration, uninstall the @arvados-sso-provider@ package.
+h3. S3 signatures
+
+Keepstore now uses "V4 signatures":https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-authenticating-requests.html by default for S3 requests. If you are using Amazon S3, no action is needed; all regions support V4 signatures. If you are using a different S3-compatible service that does not support V4 signatures, add @V2Signature: true@ to your volume driver parameters to preserve the old behavior. See "configuring S3 object storage":{{site.baseurl}}/install/configure-s3-object-storage.html.
+
h2(#v2_0_0). v2.0.0 (2020-02-07)
"Upgrading from 1.4":#v1_4_1
# declaration.
LocationConstraint: false
+ # Use V2 signatures instead of the default V4. Amazon S3
+ # supports V4 signatures in all regions, but this option
+ # might be needed for other S3-compatible services.
+ V2Signature: false
+
# Requested page size for "list bucket contents" requests.
IndexPageSize: 1000
Region: us-east-1a
Bucket: aaaaa
LocationConstraint: false
+ V2Signature: false
IndexPageSize: 1000
ConnectTimeout: 1m
ReadTimeout: 10m
Region: us-east-1a
Bucket: aaaaa
LocationConstraint: false
+ V2Signature: false
IndexPageSize: 1000
ConnectTimeout: 1m
ReadTimeout: 10m
}
defer func() {
cmd.Process.Signal(syscall.SIGTERM)
- logger.Infof("sent SIGTERM; waiting for postgres to shut down")
+ logger.Info("sent SIGTERM; waiting for postgres to shut down")
cmd.Wait()
}()
- for deadline := time.Now().Add(10 * time.Second); ; {
- output, err2 := exec.Command("pg_isready").CombinedOutput()
- if err2 == nil {
- break
- } else if time.Now().After(deadline) {
- err = fmt.Errorf("timed out waiting for pg_isready (%q)", output)
- return 1
- } else {
- time.Sleep(time.Second)
- }
+ err = waitPostgreSQLReady()
+ if err != nil {
+ return 1
}
}
// might never have been run.
}
+ var needcoll []string
+ // If the en_US.UTF-8 locale wasn't installed when
+ // postgresql initdb ran, it needs to be added
+ // explicitly before we can use it in our test suite.
+ for _, collname := range []string{"en_US", "en_US.UTF-8"} {
+ cmd := exec.Command("sudo", "-u", "postgres", "psql", "-t", "-c", "SELECT 1 FROM pg_catalog.pg_collation WHERE collname='"+collname+"' AND collcollate IN ('en_US.UTF-8', 'en_US.utf8')")
+ cmd.Dir = "/"
+ out, err2 := cmd.CombinedOutput()
+ if err != nil {
+ err = fmt.Errorf("error while checking postgresql collations: %s", err2)
+ return 1
+ }
+ if strings.Contains(string(out), "1") {
+ logger.Infof("postgresql supports collation %s", collname)
+ } else {
+ needcoll = append(needcoll, collname)
+ }
+ }
+ if len(needcoll) > 0 && os.Getpid() != 1 {
+ // In order for the CREATE COLLATION statement
+ // below to work, the locale must have existed
+ // when PostgreSQL started up. If we're
+ // running as init, we must have started
+ // PostgreSQL ourselves after installing the
+ // locales. Otherwise, it might need a
+ // restart, so we attempt to restart it with
+ // systemd.
+ if err = runBash(`sudo systemctl restart postgresql`, stdout, stderr); err != nil {
+ logger.Warn("`systemctl restart postgresql` failed; hoping postgresql does not need to be restarted")
+ } else if err = waitPostgreSQLReady(); err != nil {
+ return 1
+ }
+ }
+ for _, collname := range needcoll {
+ cmd := exec.Command("sudo", "-u", "postgres", "psql", "-c", "CREATE COLLATION \""+collname+"\" (LOCALE = \"en_US.UTF-8\")")
+ cmd.Stdout = stdout
+ cmd.Stderr = stderr
+ cmd.Dir = "/"
+ err = cmd.Run()
+ if err != nil {
+ err = fmt.Errorf("error adding postgresql collation %s: %s", collname, err)
+ return 1
+ }
+ }
+
withstuff := "WITH LOGIN SUPERUSER ENCRYPTED PASSWORD " + pq.QuoteLiteral(devtestDatabasePassword)
cmd := exec.Command("sudo", "-u", "postgres", "psql", "-c", "ALTER ROLE arvados "+withstuff)
cmd.Dir = "/"
return osv, nil
}
+func waitPostgreSQLReady() error {
+ for deadline := time.Now().Add(10 * time.Second); ; {
+ output, err := exec.Command("pg_isready").CombinedOutput()
+ if err == nil {
+ return nil
+ } else if time.Now().After(deadline) {
+ return fmt.Errorf("timed out waiting for pg_isready (%q)", output)
+ } else {
+ time.Sleep(time.Second)
+ }
+ }
+}
+
func runBash(script string, stdout, stderr io.Writer) error {
cmd := exec.Command("bash", "-")
cmd.Stdin = bytes.NewBufferString("set -ex -o pipefail\n" + script)
}
type S3VolumeDriverParameters struct {
+ IAMRole string
AccessKey string
SecretKey string
Endpoint string
Region string
Bucket string
LocationConstraint bool
+ V2Signature bool
IndexPageSize int
ConnectTimeout Duration
ReadTimeout Duration
curl.setopt(pycurl.HEADERFUNCTION, self._headerfunction)
if self.insecure:
curl.setopt(pycurl.SSL_VERIFYPEER, 0)
+ else:
+ curl.setopt(pycurl.CAINFO, arvados.util.ca_certs_path())
if method == "HEAD":
curl.setopt(pycurl.NOBODY, True)
self._setcurltimeouts(curl, timeout, method=="HEAD")
curl.setopt(pycurl.HEADERFUNCTION, self._headerfunction)
if self.insecure:
curl.setopt(pycurl.SSL_VERIFYPEER, 0)
+ else:
+ curl.setopt(pycurl.CAINFO, arvados.util.ca_certs_path())
self._setcurltimeouts(curl, timeout)
try:
curl.perform()
it returns the value of `fallback` (httplib2's CA certs by default).
"""
for ca_certs_path in [
+ # SSL_CERT_FILE and SSL_CERT_DIR are openssl overrides - note
+ # that httplib2 itself also supports HTTPLIB2_CA_CERTS.
+ os.environ.get('SSL_CERT_FILE'),
# Arvados specific:
'/etc/arvados/ca-certificates.crt',
# Debian:
# Red Hat:
'/etc/pki/tls/certs/ca-bundle.crt',
]:
- if os.path.exists(ca_certs_path):
+ if ca_certs_path and os.path.exists(ca_certs_path):
return ca_certs_path
return fallback
dbcfg.declare_config "PostgreSQL.Connection.dbname", String, :database
dbcfg.declare_config "PostgreSQL.Connection.template", String, :template
dbcfg.declare_config "PostgreSQL.Connection.encoding", String, :encoding
+dbcfg.declare_config "PostgreSQL.Connection.collation", String, :collation
application_config = {}
%w(application.default application).each do |cfgfile|
# Use template0 when creating a new database. Avoids
# character-encoding/collation problems.
$arvados_config["PostgreSQL"]["Connection"]["template"] = "template0"
+ # Some test cases depend on en_US.UTF-8 collation.
+ $arvados_config["PostgreSQL"]["Connection"]["collation"] = "en_US.UTF-8"
end
if $arvados_config["PostgreSQL"]["Connection"]["password"].empty?
"#{dbhost}/#{$arvados_config["PostgreSQL"]["Connection"]["dbname"]}?"+
"template=#{$arvados_config["PostgreSQL"]["Connection"]["template"]}&"+
"encoding=#{$arvados_config["PostgreSQL"]["Connection"]["client_encoding"]}&"+
+ "collation=#{$arvados_config["PostgreSQL"]["Connection"]["collation"]}&"+
"pool=#{$arvados_config["PostgreSQL"]["ConnectionPool"]}"
Server::Application.configure do
// S3Volume implements Volume using an S3 bucket.
type S3Volume struct {
- AccessKey string
- SecretKey string
- AuthToken string // populated automatically when IAMRole is used
- AuthExpiration time.Time // populated automatically when IAMRole is used
- IAMRole string
- Endpoint string
- Region string
- Bucket string
- LocationConstraint bool
- IndexPageSize int
- ConnectTimeout arvados.Duration
- ReadTimeout arvados.Duration
- RaceWindow arvados.Duration
- UnsafeDelete bool
+ arvados.S3VolumeDriverParameters
+ AuthToken string // populated automatically when IAMRole is used
+ AuthExpiration time.Time // populated automatically when IAMRole is used
cluster *arvados.Cluster
volume arvados.Volume
func (v *S3Volume) newS3Client() *s3.S3 {
auth := aws.NewAuth(v.AccessKey, v.SecretKey, v.AuthToken, v.AuthExpiration)
client := s3.New(*auth, v.region)
- if v.region.EC2Endpoint.Signer == aws.V4Signature {
- // Currently affects only eu-central-1
+ if !v.V2Signature {
client.Signature = aws.V4Signature
}
client.ConnectTimeout = time.Duration(v.ConnectTimeout)
}
}
+func (s *StubbedS3Suite) TestSignatureVersion(c *check.C) {
+ var header http.Header
+ stub := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ header = r.Header
+ }))
+ defer stub.Close()
+
+ // Default V4 signature
+ vol := S3Volume{
+ S3VolumeDriverParameters: arvados.S3VolumeDriverParameters{
+ AccessKey: "xxx",
+ SecretKey: "xxx",
+ Endpoint: stub.URL,
+ Region: "test-region-1",
+ Bucket: "test-bucket-name",
+ },
+ cluster: s.cluster,
+ logger: ctxlog.TestLogger(c),
+ metrics: newVolumeMetricsVecs(prometheus.NewRegistry()),
+ }
+ err := vol.check()
+ c.Check(err, check.IsNil)
+ err = vol.Put(context.Background(), "acbd18db4cc2f85cedef654fccc4a4d8", []byte("foo"))
+ c.Check(err, check.IsNil)
+ c.Check(header.Get("Authorization"), check.Matches, `AWS4-HMAC-SHA256 .*`)
+
+ // Force V2 signature
+ vol = S3Volume{
+ S3VolumeDriverParameters: arvados.S3VolumeDriverParameters{
+ AccessKey: "xxx",
+ SecretKey: "xxx",
+ Endpoint: stub.URL,
+ Region: "test-region-1",
+ Bucket: "test-bucket-name",
+ V2Signature: true,
+ },
+ cluster: s.cluster,
+ logger: ctxlog.TestLogger(c),
+ metrics: newVolumeMetricsVecs(prometheus.NewRegistry()),
+ }
+ err = vol.check()
+ c.Check(err, check.IsNil)
+ err = vol.Put(context.Background(), "acbd18db4cc2f85cedef654fccc4a4d8", []byte("foo"))
+ c.Check(err, check.IsNil)
+ c.Check(header.Get("Authorization"), check.Matches, `AWS xxx:.*`)
+}
+
func (s *StubbedS3Suite) TestIAMRoleCredentials(c *check.C) {
s.metadata = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
upd := time.Now().UTC().Add(-time.Hour).Format(time.RFC3339)
w.WriteHeader(http.StatusNotFound)
}))
deadv := &S3Volume{
- IAMRole: s.metadata.URL + "/fake-metadata/test-role",
- Endpoint: "http://localhost:12345",
- Region: "test-region-1",
- Bucket: "test-bucket-name",
- cluster: s.cluster,
- logger: ctxlog.TestLogger(c),
- metrics: newVolumeMetricsVecs(prometheus.NewRegistry()),
+ S3VolumeDriverParameters: arvados.S3VolumeDriverParameters{
+ IAMRole: s.metadata.URL + "/fake-metadata/test-role",
+ Endpoint: "http://localhost:12345",
+ Region: "test-region-1",
+ Bucket: "test-bucket-name",
+ },
+ cluster: s.cluster,
+ logger: ctxlog.TestLogger(c),
+ metrics: newVolumeMetricsVecs(prometheus.NewRegistry()),
}
err := deadv.check()
c.Check(err, check.ErrorMatches, `.*/fake-metadata/test-role.*`)
v := &TestableS3Volume{
S3Volume: &S3Volume{
- AccessKey: accessKey,
- SecretKey: secretKey,
- IAMRole: iamRole,
- Bucket: TestBucketName,
- Endpoint: endpoint,
- Region: "test-region-1",
- LocationConstraint: true,
- UnsafeDelete: true,
- IndexPageSize: 1000,
- cluster: cluster,
- volume: volume,
- logger: ctxlog.TestLogger(c),
- metrics: metrics,
+ S3VolumeDriverParameters: arvados.S3VolumeDriverParameters{
+ IAMRole: iamRole,
+ AccessKey: accessKey,
+ SecretKey: secretKey,
+ Bucket: TestBucketName,
+ Endpoint: endpoint,
+ Region: "test-region-1",
+ LocationConstraint: true,
+ UnsafeDelete: true,
+ IndexPageSize: 1000,
+ },
+ cluster: cluster,
+ volume: volume,
+ logger: ctxlog.TestLogger(c),
+ metrics: metrics,
},
c: c,
server: srv,
return e
}
defer v.unlockfile(f)
- ts := syscall.NsecToTimespec(time.Now().UnixNano())
+ ts := time.Now()
v.os.stats.TickOps("utimes")
v.os.stats.Tick(&v.os.stats.UtimesOps)
- err = syscall.UtimesNano(p, []syscall.Timespec{ts, ts})
+ err = os.Chtimes(p, ts, ts)
v.os.stats.TickErr(err)
return err
}
v.os.Remove(tmpfile.Name())
return err
}
+ // ext4 uses a low-precision clock and effectively backdates
+ // files by up to 10 ms, sometimes across a 1-second boundary,
+ // which produces confusing results in logs and tests. We
+ // avoid this by setting the output file's timestamps
+ // explicitly, using a higher resolution clock.
+ ts := time.Now()
+ v.os.stats.TickOps("utimes")
+ v.os.stats.Tick(&v.os.stats.UtimesOps)
+ if err = os.Chtimes(tmpfile.Name(), ts, ts); err != nil {
+ err = fmt.Errorf("error setting timestamps on %s: %s", tmpfile.Name(), err)
+ v.os.Remove(tmpfile.Name())
+ return err
+ }
if err := v.os.Rename(tmpfile.Name(), bpath); err != nil {
err = fmt.Errorf("error renaming %s to %s: %s", tmpfile.Name(), bpath, err)
v.os.Remove(tmpfile.Name())
c.Check(stats(), check.Matches, `.*"OutBytes":3,.*`)
c.Check(stats(), check.Matches, `.*"CreateOps":1,.*`)
c.Check(stats(), check.Matches, `.*"OpenOps":0,.*`)
- c.Check(stats(), check.Matches, `.*"UtimesOps":0,.*`)
+ c.Check(stats(), check.Matches, `.*"UtimesOps":1,.*`)
err = vol.Touch(loc)
c.Check(err, check.IsNil)
c.Check(stats(), check.Matches, `.*"FlockOps":1,.*`)
c.Check(stats(), check.Matches, `.*"OpenOps":1,.*`)
- c.Check(stats(), check.Matches, `.*"UtimesOps":1,.*`)
+ c.Check(stats(), check.Matches, `.*"UtimesOps":2,.*`)
_, err = vol.Get(context.Background(), loc, make([]byte, 3))
c.Check(err, check.IsNil)