Merge branch '16314-testuserdb'
authorTom Clegg <tom@tomclegg.ca>
Fri, 21 Aug 2020 20:39:34 +0000 (16:39 -0400)
committerTom Clegg <tom@tomclegg.ca>
Fri, 21 Aug 2020 20:39:34 +0000 (16:39 -0400)
refs #16314

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@tomclegg.ca>

37 files changed:
build/rails-package-scripts/README.md
build/rails-package-scripts/arvados-api-server.sh
build/rails-package-scripts/prerm.sh
build/run-build-docker-jobs-image.sh
build/run-build-packages-python-and-ruby.sh
build/run-tests.sh
doc/install/crunch2-cloud/install-compute-node.html.textile.liquid
doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid
doc/sdk/python/cookbook.html.textile.liquid
go.mod
go.sum
lib/cloud/azure/azure.go
lib/cloud/azure/azure_test.go
lib/config/config.default.yml
lib/config/export.go
lib/config/generated_config.go
sdk/cli/arvados-cli.gemspec
sdk/go/arvados/config.go
sdk/go/arvados/fs_base.go
sdk/go/arvados/fs_collection.go
sdk/go/arvados/fs_collection_test.go
sdk/go/arvados/fs_deferred.go
sdk/go/arvados/fs_lookup.go
sdk/go/arvados/fs_project.go
sdk/go/arvados/fs_project_test.go
sdk/go/arvados/fs_site.go
sdk/go/arvados/fs_site_test.go
services/api/.gitignore
services/keep-web/handler.go
services/keep-web/main.go
services/keep-web/s3.go [new file with mode: 0644]
services/keep-web/s3_test.go [new file with mode: 0644]
services/keep-web/server.go
services/keep-web/server_test.go
tools/arvbox/bin/arvbox
tools/compute-images/arvados-images-azure.json
tools/compute-images/build.sh

index 0d720bde147d58cf26600ff784b7a34f92effaea..2930957b942affd86326248e6e0e4a3efb3166f9 100644 (file)
@@ -12,7 +12,7 @@ Since our build process is a tower of shell scripts, concatenating files seemed
 
 postinst.sh lets the early parts define a few hooks to control behavior:
 
-* After it installs the core configuration files (database.yml, application.yml, and production.rb) to /etc/arvados/server, it calls setup_extra_conffiles.  By default this is a noop function (in step2.sh).  API server defines this to set up the old omniauth.rb conffile.
+* After it installs the core configuration files (database.yml, application.yml, and production.rb) to /etc/arvados/server, it calls setup_extra_conffiles.  By default this is a noop function (in step2.sh).
 * Before it restarts nginx, it calls setup_before_nginx_restart.  By default this is a noop function (in step2.sh).  API server defines this to set up the internal git repository, if necessary.
 * $RAILSPKG_DATABASE_LOAD_TASK defines the Rake task to load the database.  API server uses db:structure:load.  SSO server uses db:schema:load.  Workbench doesn't set this, which causes the postinst to skip all database work.
 * If $RAILSPKG_SUPPORTS_CONFIG_CHECK != 1, it won't run the config:check rake task.  SSO clears this flag (it doesn't have that task code).
index 82bc9898aa87c350b38774db6db349294330bc9f..027383ab4f62294aa9661b5e9ce1651457f3bf1a 100644 (file)
@@ -12,7 +12,9 @@ DOC_URL="http://doc.arvados.org/install/install-api-server.html#configure"
 
 RAILSPKG_DATABASE_LOAD_TASK=db:structure:load
 setup_extra_conffiles() {
-    setup_conffile initializers/omniauth.rb
+  # Rails 5.2 does not tolerate dangling symlinks in the initializers directory, and this one
+  # can still be there, left over from a previous version of the API server package.
+  rm -f $RELEASE_PATH/config/initializers/omniauth.rb
 }
 
 setup_before_nginx_restart() {
index 9816b14c6d5a975a1aafd51f7b47033196efd38c..6773b6f308080314fca9e6e4b7703567ab14a12a 100644 (file)
@@ -10,7 +10,6 @@ remove () {
   rm -f $RELEASE_PATH/config/environments/production.rb
   rm -f $RELEASE_PATH/config/application.yml
   # Old API server configuration file.
-  rm -f $RELEASE_PATH/config/initializers/omniauth.rb
   rm -rf $RELEASE_PATH/public/assets/
   rm -rf $RELEASE_PATH/tmp
   rm -rf $RELEASE_PATH/.bundle
index ec8357701d067fe0b17bdc2df01f17a1bf4f948e..d1fb2ac67054dfdc31ce8a31401747c3a55aefbf 100755 (executable)
@@ -185,28 +185,23 @@ if docker --version |grep " 1\.[0-9]\." ; then
     FORCE=-f
 fi
 
-#docker export arvados/jobs:$cwl_runner_version_orig | docker import - arvados/jobs:$cwl_runner_version_orig
-
 if ! [[ -z "$version_tag" ]]; then
     docker tag $FORCE arvados/jobs:$cwl_runner_version_orig arvados/jobs:"$version_tag"
-else
-    docker tag $FORCE arvados/jobs:$cwl_runner_version_orig arvados/jobs:latest
-fi
+    ECODE=$?
 
-ECODE=$?
+    if [[ "$ECODE" != "0" ]]; then
+        EXITCODE=$(($EXITCODE + $ECODE))
+    fi
 
-if [[ "$ECODE" != "0" ]]; then
-    EXITCODE=$(($EXITCODE + $ECODE))
+    checkexit $ECODE "docker tag"
+    title "docker tag complete (`timer`)"
 fi
 
-checkexit $ECODE "docker tag"
-title "docker tag complete (`timer`)"
-
 title "uploading images"
 
 timer_reset
 
-if [[ "$ECODE" != "0" ]]; then
+if [[ "$EXITCODE" != "0" ]]; then
     title "upload arvados images SKIPPED because build or tag failed"
 else
     if [[ $upload == true ]]; then
@@ -217,7 +212,6 @@ else
             docker_push arvados/jobs:"$version_tag"
         else
            docker_push arvados/jobs:$cwl_runner_version_orig
-           docker_push arvados/jobs:latest
         fi
         title "upload arvados images finished (`timer`)"
     else
index 66201b3b4d0b577b66b956730c67b5b2e20d3913..f3b7564d714f41492c8ff55933707a98c99086fb 100755 (executable)
@@ -192,7 +192,6 @@ PYTHON_BUILD_FAILURES=0
 if [ $PYTHON -eq 1 ]; then
   debug_echo "Building Python packages"
   python_wrapper arvados-python-client "$WORKSPACE/sdk/python"
-  python_wrapper arvados-pam "$WORKSPACE/sdk/pam"
   python_wrapper arvados-cwl-runner "$WORKSPACE/sdk/cwl"
   python_wrapper arvados_fuse "$WORKSPACE/services/fuse"
 
index 2742540b16b44efe57fa113d23d3e967915e5c2f..bedc95b2db28503d2505bc9ce840a06a89662496 100755 (executable)
@@ -195,7 +195,7 @@ sanity_checks() {
     ( [[ -n "$WORKSPACE" ]] && [[ -d "$WORKSPACE/services" ]] ) \
         || fatal "WORKSPACE environment variable not set to a source directory (see: $0 --help)"
     [[ -z "$CONFIGSRC" ]] || [[ -s "$CONFIGSRC/config.yml" ]] \
-       || fatal "CONFIGSRC is $CONFIGSRC but '$CONFIGSRC/config.yml' is empty or not found (see: $0 --help)"
+        || fatal "CONFIGSRC is $CONFIGSRC but '$CONFIGSRC/config.yml' is empty or not found (see: $0 --help)"
     echo Checking dependencies:
     echo "locale: ${LANG}"
     [[ "$(locale charmap)" = "UTF-8" ]] \
@@ -373,7 +373,7 @@ if [[ ${skip["sdk/R"]} == 1 && ${skip["doc"]} == 1 ]]; then
 fi
 
 if [[ $NEED_SDK_R == false ]]; then
-       echo "R SDK not needed, it will not be installed."
+        echo "R SDK not needed, it will not be installed."
 fi
 
 checkpidfile() {
@@ -414,11 +414,11 @@ start_services() {
     . "$VENVDIR/bin/activate"
     echo 'Starting API, controller, keepproxy, keep-web, arv-git-httpd, ws, and nginx ssl proxy...'
     if [[ ! -d "$WORKSPACE/services/api/log" ]]; then
-       mkdir -p "$WORKSPACE/services/api/log"
+        mkdir -p "$WORKSPACE/services/api/log"
     fi
     # Remove empty api.pid file if it exists
     if [[ -f "$WORKSPACE/tmp/api.pid" && ! -s "$WORKSPACE/tmp/api.pid" ]]; then
-       rm -f "$WORKSPACE/tmp/api.pid"
+        rm -f "$WORKSPACE/tmp/api.pid"
     fi
     all_services_stopped=
     fail=1
@@ -817,19 +817,19 @@ do_test_once() {
 
 check_arvados_config() {
     if [[ "$1" = "env" ]] ; then
-       return
+        return
     fi
     if [[ -z "$ARVADOS_CONFIG" ]] ; then
-       # Create config file.  The run_test_server script requires PyYAML,
-       # so virtualenv needs to be active.  Downstream steps like
-       # workbench install which require a valid config.yml.
-       if [[ ! -s "$VENVDIR/bin/activate" ]] ; then
-           install_env
-       fi
-       . "$VENVDIR/bin/activate"
+        # Create config file.  The run_test_server script requires PyYAML,
+        # so virtualenv needs to be active.  Downstream steps like
+        # workbench install which require a valid config.yml.
+        if [[ ! -s "$VENVDIR/bin/activate" ]] ; then
+            install_env
+        fi
+        . "$VENVDIR/bin/activate"
         cd "$WORKSPACE"
-       eval $(python sdk/python/tests/run_test_server.py setup_config)
-       deactivate
+        eval $(python sdk/python/tests/run_test_server.py setup_config)
+        deactivate
     fi
 }
 
index 23da428b395a994729ab02e5dcacb6cb1e3f3d2f..cdecc88152e38f1e34e2a1ebdbc26e6271174a96 100644 (file)
@@ -92,8 +92,6 @@ Options:
       Azure secrets file which will be sourced from this script
   --azure-resource-group (default: false, required if building for Azure)
       Azure resource group
-  --azure-storage-account (default: false, required if building for Azure)
-      Azure storage account
   --azure-location (default: false, required if building for Azure)
       Azure location, e.g. centralus, eastus, westeurope
   --azure-sku (default: unset, required if building for Azure, e.g. 16.04-LTS)
@@ -117,7 +115,6 @@ h2(#azure). Build an Azure image
 <notextile><pre><code>~$ <span class="userinput">./build.sh --json-file arvados-images-azure.json \
            --arvados-cluster-id ClusterID \
            --azure-resource-group ResourceGroup \
-           --azure-storage-account StorageAccount \
            --azure-location AzureRegion \
            --azure-sku AzureSKU \
            --azure-secrets-file AzureSecretsFilePath \
@@ -126,7 +123,7 @@ h2(#azure). Build an Azure image
 </span>
 </code></pre></notextile>
 
-For @ClusterID@, fill in your cluster ID. The @ResourceGroup@, @StorageAccount@ and @AzureRegion@ (e.g. 'eastus2') should be configured for where you want the compute image to be generated and stored. The @AzureSKU@ is the SKU of the base image to be used, e.g. '18.04-LTS' for Ubuntu 18.04.
+For @ClusterID@, fill in your cluster ID. The @ResourceGroup@ and @AzureRegion@ (e.g. 'eastus2') should be configured for where you want the compute image to be generated and stored. The @AzureSKU@ is the SKU of the base image to be used, e.g. '18.04-LTS' for Ubuntu 18.04.
 
 @AzureSecretsFilePath@ should be replaced with the path to a shell script that loads the Azure secrets with sufficient permissions to create the image. The file would look like this:
 
index faa7c5b953fcf6febf3b32080914c392d27a5a7e..68417784701ce387e7437bb0f0b8e62a2335e5ff 100644 (file)
@@ -93,6 +93,77 @@ h4. Minimal configuration example for Amazon EC2
 
 h4. Minimal configuration example for Azure
 
+Using managed disks:
+
+<notextile>
+<pre><code>    Containers:
+      CloudVMs:
+        ImageID: "zzzzz-compute-v1597349873"
+        Driver: azure
+        DriverParameters:
+          # Credentials.
+          SubscriptionID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+          ClientID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+          ClientSecret: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          TenantID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+
+          # Data center where VMs will be allocated
+          Location: centralus
+
+          # The resource group where the VM and virtual NIC will be
+          # created.
+          ResourceGroup: zzzzz
+          NetworkResourceGroup: yyyyy   # only if different from ResourceGroup
+          Network: xxxxx
+          Subnet: xxxxx-subnet-private
+
+          # The resource group where the disk image is stored, only needs to
+          # be specified if it is different from ResourceGroup
+          ImageResourceGroup: aaaaa
+
+</code></pre>
+</notextile>
+
+Azure recommends using managed images. If you plan to start more than 20 VMs simultaneously, Azure recommends using a shared image gallery instead to avoid slowdowns and timeouts during the creation of the VMs.
+
+Using an image from a shared image gallery:
+
+<notextile>
+<pre><code>    Containers:
+      CloudVMs:
+        ImageID: "shared_image_gallery_image_definition_name"
+        Driver: azure
+        DriverParameters:
+          # Credentials.
+          SubscriptionID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+          ClientID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+          ClientSecret: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          TenantID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+
+          # Data center where VMs will be allocated
+          Location: centralus
+
+          # The resource group where the VM and virtual NIC will be
+          # created.
+          ResourceGroup: zzzzz
+          NetworkResourceGroup: yyyyy   # only if different from ResourceGroup
+          Network: xxxxx
+          Subnet: xxxxx-subnet-private
+
+          # The resource group where the disk image is stored, only needs to
+          # be specified if it is different from ResourceGroup
+          ImageResourceGroup: aaaaa
+
+          # (azure) shared image gallery: the name of the gallery
+          SharedImageGalleryName: "shared_image_gallery_1"
+          # (azure) shared image gallery: the version of the image definition
+          SharedImageGalleryImageVersion: "0.0.1"
+
+</code></pre>
+</notextile>
+
+Using unmanaged disks (deprecated):
+
 <notextile>
 <pre><code>    Containers:
       CloudVMs:
index bd7f64b33d068175b3219f1c962f671777fd3173..75c51ee5a8126c57b9b23bc95d9cffdcf7fc027c 100644 (file)
@@ -81,7 +81,7 @@ def get_cr_state(cr_uuid):
             return 'On hold'
         else:
             return 'Queued'
-    elif c['state'] == 'Complete' and c['exit_code'] != 0
+    elif c['state'] == 'Complete' and c['exit_code'] != 0:
         return 'Failed'
     elif c['state'] == 'Running':
         if c['runtime_status'].get('error', None):
@@ -144,7 +144,7 @@ child_requests = api.container_requests().list(filters=[
 child_containers = {c["container_uuid"]: c for c in child_requests["items"]}
 cancelled_child_containers = api.containers().list(filters=[
     ["exit_code", "!=", "0"],
-    ["uuid", "in", child_containers.keys()]], limit=1000).execute()
+    ["uuid", "in", list(child_containers.keys())]], limit=1000).execute()
 for c in cancelled_child_containers["items"]:
     print("%s (%s)" % (child_containers[c["uuid"]]["name"], child_containers[c["uuid"]]["uuid"]))
 {% endcodeblock %}
@@ -159,7 +159,8 @@ container_request_uuid = "qr1hi-xvhdp-zzzzzzzzzzzzzzz"
 container_request = api.container_requests().get(uuid=container_request_uuid).execute()
 collection = arvados.collection.CollectionReader(container_request["log_uuid"])
 for c in collection:
-    print(collection.open(c).read())
+    if isinstance(collection.find(c), arvados.arvfile.ArvadosFile):
+        print(collection.open(c).read())
 {% endcodeblock %}
 
 h2(#sharing_link). Create a collection sharing link
diff --git a/go.mod b/go.mod
index 71052882adbeff703ae81a21900561afe15c8743..262978d9125d412b32bfee22508bcfe517de8ec6 100644 (file)
--- a/go.mod
+++ b/go.mod
@@ -4,8 +4,12 @@ go 1.13
 
 require (
        github.com/AdRoll/goamz v0.0.0-20170825154802-2731d20f46f4
-       github.com/Azure/azure-sdk-for-go v19.1.0+incompatible
-       github.com/Azure/go-autorest v10.15.2+incompatible
+       github.com/Azure/azure-sdk-for-go v45.1.0+incompatible
+       github.com/Azure/go-autorest v14.2.0+incompatible
+       github.com/Azure/go-autorest/autorest v0.11.3
+       github.com/Azure/go-autorest/autorest/azure/auth v0.5.1
+       github.com/Azure/go-autorest/autorest/to v0.4.0
+       github.com/Azure/go-autorest/autorest/validation v0.3.0 // indirect
        github.com/Microsoft/go-winio v0.4.5 // indirect
        github.com/alcortesm/tgz v0.0.0-20161220082320-9c5fe88206d7 // indirect
        github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239 // indirect
@@ -16,8 +20,6 @@ require (
        github.com/bradleypeabody/godap v0.0.0-20170216002349-c249933bc092
        github.com/coreos/go-oidc v2.1.0+incompatible
        github.com/coreos/go-systemd v0.0.0-20180108085132-cc4f39464dc7
-       github.com/dgrijalva/jwt-go v3.1.0+incompatible // indirect
-       github.com/dimchansky/utfbom v1.0.0 // indirect
        github.com/dnaeon/go-vcr v1.0.1 // indirect
        github.com/docker/distribution v2.6.0-rc.1.0.20180105232752-277ed486c948+incompatible // indirect
        github.com/docker/docker v1.4.2-0.20180109013817-94b8a116fbf1
@@ -44,7 +46,6 @@ require (
        github.com/kevinburke/ssh_config v0.0.0-20171013211458-802051befeb5 // indirect
        github.com/lib/pq v1.3.0
        github.com/marstr/guid v1.1.1-0.20170427235115-8bdf7d1a087c // indirect
-       github.com/mitchellh/go-homedir v0.0.0-20161203194507-b8bc1bf76747 // indirect
        github.com/msteinert/pam v0.0.0-20190215180659-f29b9f28d6f9
        github.com/opencontainers/go-digest v1.0.0-rc1 // indirect
        github.com/opencontainers/image-spec v1.0.1-0.20171125024018-577479e4dc27 // indirect
@@ -57,9 +58,8 @@ require (
        github.com/sergi/go-diff v1.0.0 // indirect
        github.com/sirupsen/logrus v1.4.2
        github.com/src-d/gcfg v1.3.0 // indirect
-       github.com/stretchr/testify v1.4.0 // indirect
        github.com/xanzy/ssh-agent v0.1.0 // indirect
-       golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550
+       golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9
        golang.org/x/net v0.0.0-20200202094626-16171245cfb2
        golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45
        golang.org/x/sys v0.0.0-20191105231009-c1f44814a5cd
diff --git a/go.sum b/go.sum
index ac5c03fc83726ca016a177a31087829eae395749..85d205112fb95ecf1895d96122686e0e2e2a849b 100644 (file)
--- a/go.sum
+++ b/go.sum
@@ -2,10 +2,40 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT
 cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.38.0 h1:ROfEUZz+Gh5pa62DJWXSaonyu3StP6EA6lPEXPI6mCo=
 cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
+github.com/Azure/azure-sdk-for-go v0.2.0-beta h1:wYBqYNMWr0WL2lcEZi+dlK9n+N0wJ0Pjs4BKeOnDjfQ=
 github.com/Azure/azure-sdk-for-go v19.1.0+incompatible h1:ysqLW+tqZjJWOTE74heH/pDRbr4vlN3yV+dqQYgpyxw=
 github.com/Azure/azure-sdk-for-go v19.1.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
+github.com/Azure/azure-sdk-for-go v20.2.0+incompatible h1:La3ODnagAOf5ZFUepTfVftvNTdxkq06DNpgi1l0yaM0=
+github.com/Azure/azure-sdk-for-go v20.2.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
+github.com/Azure/azure-sdk-for-go v45.1.0+incompatible h1:kxtaPD8n2z5Za+9e3sKsYG2IX6PG2R6VXtgS7gAbh3A=
+github.com/Azure/azure-sdk-for-go v45.1.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
+github.com/Azure/go-autorest v1.1.1 h1:4G9tVCqooRY3vDTB2bA1Z01PlSALtnUbji0AfzthUSs=
 github.com/Azure/go-autorest v10.15.2+incompatible h1:oZpnRzZie83xGV5txbT1aa/7zpCPvURGhV6ThJij2bs=
 github.com/Azure/go-autorest v10.15.2+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
+github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs=
+github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
+github.com/Azure/go-autorest/autorest v0.11.0/go.mod h1:JFgpikqFJ/MleTTxwepExTKnFUKKszPS8UavbQYUMuw=
+github.com/Azure/go-autorest/autorest v0.11.3 h1:fyYnmYujkIXUgv88D9/Wo2ybE4Zwd/TmQd5sSI5u2Ws=
+github.com/Azure/go-autorest/autorest v0.11.3/go.mod h1:JFgpikqFJ/MleTTxwepExTKnFUKKszPS8UavbQYUMuw=
+github.com/Azure/go-autorest/autorest/adal v0.9.0/go.mod h1:/c022QCutn2P7uY+/oQWWNcK9YU+MH96NgK+jErpbcg=
+github.com/Azure/go-autorest/autorest/adal v0.9.2 h1:Aze/GQeAN1RRbGmnUJvUj+tFGBzFdIg3293/A9rbxC4=
+github.com/Azure/go-autorest/autorest/adal v0.9.2/go.mod h1:/3SMAM86bP6wC9Ev35peQDUeqFZBMH07vvUOmg4z/fE=
+github.com/Azure/go-autorest/autorest/azure/auth v0.5.1 h1:bvUhZciHydpBxBmCheUgxxbSwJy7xcfjkUsjUcqSojc=
+github.com/Azure/go-autorest/autorest/azure/auth v0.5.1/go.mod h1:ea90/jvmnAwDrSooLH4sRIehEPtG/EPUXavDh31MnA4=
+github.com/Azure/go-autorest/autorest/azure/cli v0.4.0 h1:Ml+UCrnlKD+cJmSzrZ/RDcDw86NjkRUpnFh7V5JUhzU=
+github.com/Azure/go-autorest/autorest/azure/cli v0.4.0/go.mod h1:JljT387FplPzBA31vUcvsetLKF3pec5bdAxjVU4kI2s=
+github.com/Azure/go-autorest/autorest/date v0.3.0 h1:7gUk1U5M/CQbp9WoqinNzJar+8KY+LPI6wiWrP/myHw=
+github.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSYnokU+TrmwEsOqdt8Y6sso74=
+github.com/Azure/go-autorest/autorest/mocks v0.4.0/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k=
+github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k=
+github.com/Azure/go-autorest/autorest/to v0.4.0 h1:oXVqrxakqqV1UZdSazDOPOLvOIz+XA683u8EctwboHk=
+github.com/Azure/go-autorest/autorest/to v0.4.0/go.mod h1:fE8iZBn7LQR7zH/9XU2NcPR4o9jEImooCeWJcYV/zLE=
+github.com/Azure/go-autorest/autorest/validation v0.3.0 h1:3I9AAI63HfcLtphd9g39ruUwRI+Ca+z/f36KHPFRUss=
+github.com/Azure/go-autorest/autorest/validation v0.3.0/go.mod h1:yhLgjC0Wda5DYXl6JAsWyUe4KVNffhoDhG0zVzUMo3E=
+github.com/Azure/go-autorest/logger v0.2.0 h1:e4RVHVZKC5p6UANLJHkM4OfR1UKZPj8Wt8Pcx+3oqrE=
+github.com/Azure/go-autorest/logger v0.2.0/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8=
+github.com/Azure/go-autorest/tracing v0.6.0 h1:TYi4+3m5t6K48TGI9AUdb+IzbnSxvnvUMfuitfgcfuo=
+github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/Microsoft/go-winio v0.4.5 h1:U2XsGR5dBg1yzwSEJoP2dE2/aAXpmad+CNG2hE9Pd5k=
 github.com/Microsoft/go-winio v0.4.5/go.mod h1:VhR8bwka0BXejwEJY73c50VrPtXAaKcyvVC4A4RozmA=
@@ -48,8 +78,12 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/dgrijalva/jwt-go v3.1.0+incompatible h1:FFziAwDQQ2dz1XClWMkwvukur3evtZx7x/wMHKM1i20=
 github.com/dgrijalva/jwt-go v3.1.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
+github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
+github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
 github.com/dimchansky/utfbom v1.0.0 h1:fGC2kkf4qOoKqZ4q7iIh+Vef4ubC1c38UDsEyZynZPc=
 github.com/dimchansky/utfbom v1.0.0/go.mod h1:rO41eb7gLfo8SF1jd9F8HplJm1Fewwi4mQvIirEdv+8=
+github.com/dimchansky/utfbom v1.1.0 h1:FcM3g+nofKgUteL8dm/UpdRXNC9KmADgTpLKsu0TRo4=
+github.com/dimchansky/utfbom v1.1.0/go.mod h1:rO41eb7gLfo8SF1jd9F8HplJm1Fewwi4mQvIirEdv+8=
 github.com/dnaeon/go-vcr v1.0.1 h1:r8L/HqC0Hje5AXMu1ooW8oyQyOFv4GxqpL0nRP7SLLY=
 github.com/dnaeon/go-vcr v1.0.1/go.mod h1:aBB1+wY4s93YsC3HHjMBMrwTj2R9FHDzUr9KyGc8n1E=
 github.com/docker/distribution v2.6.0-rc.1.0.20180105232752-277ed486c948+incompatible h1:PVtvnmmxSMUcT5AY6vG7sCCzRg3eyoW6vQvXtITC60c=
@@ -144,6 +178,8 @@ github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0j
 github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
 github.com/mitchellh/go-homedir v0.0.0-20161203194507-b8bc1bf76747 h1:eQox4Rh4ewJF+mqYPxCkmBAirRnPaHEB26UkNuPyjlk=
 github.com/mitchellh/go-homedir v0.0.0-20161203194507-b8bc1bf76747/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
+github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
+github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
@@ -207,6 +243,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90Pveol
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550 h1:ObdrDkeb4kJdCP557AjRjq69pTHfNouLtWZG7j9rPN8=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 h1:psW17arqaxU48Z5kZ0CQnkZWQJsqcURM6tKiBApRjXI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
index 6de367aa251c4c034b77331befde540782dc89d5..c26309aca5a95fc7d35495295d33f79c72c74cdf 100644 (file)
@@ -8,6 +8,7 @@ import (
        "context"
        "encoding/base64"
        "encoding/json"
+       "errors"
        "fmt"
        "net/http"
        "regexp"
@@ -18,7 +19,7 @@ import (
 
        "git.arvados.org/arvados.git/lib/cloud"
        "git.arvados.org/arvados.git/sdk/go/arvados"
-       "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-06-01/compute"
+       "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute"
        "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-06-01/network"
        storageacct "github.com/Azure/azure-sdk-for-go/services/storage/mgmt/2018-02-01/storage"
        "github.com/Azure/azure-sdk-for-go/storage"
@@ -35,20 +36,23 @@ import (
 var Driver = cloud.DriverFunc(newAzureInstanceSet)
 
 type azureInstanceSetConfig struct {
-       SubscriptionID               string
-       ClientID                     string
-       ClientSecret                 string
-       TenantID                     string
-       CloudEnvironment             string
-       ResourceGroup                string
-       Location                     string
-       Network                      string
-       NetworkResourceGroup         string
-       Subnet                       string
-       StorageAccount               string
-       BlobContainer                string
-       DeleteDanglingResourcesAfter arvados.Duration
-       AdminUsername                string
+       SubscriptionID                 string
+       ClientID                       string
+       ClientSecret                   string
+       TenantID                       string
+       CloudEnvironment               string
+       ResourceGroup                  string
+       ImageResourceGroup             string
+       Location                       string
+       Network                        string
+       NetworkResourceGroup           string
+       Subnet                         string
+       StorageAccount                 string
+       BlobContainer                  string
+       SharedImageGalleryName         string
+       SharedImageGalleryImageVersion string
+       DeleteDanglingResourcesAfter   arvados.Duration
+       AdminUsername                  string
 }
 
 type containerWrapper interface {
@@ -138,6 +142,25 @@ func (cl *interfacesClientImpl) listComplete(ctx context.Context, resourceGroupN
        return r, wrapAzureError(err)
 }
 
+type disksClientWrapper interface {
+       listByResourceGroup(ctx context.Context, resourceGroupName string) (result compute.DiskListPage, err error)
+       delete(ctx context.Context, resourceGroupName string, diskName string) (result compute.DisksDeleteFuture, err error)
+}
+
+type disksClientImpl struct {
+       inner compute.DisksClient
+}
+
+func (cl *disksClientImpl) listByResourceGroup(ctx context.Context, resourceGroupName string) (result compute.DiskListPage, err error) {
+       r, err := cl.inner.ListByResourceGroup(ctx, resourceGroupName)
+       return r, wrapAzureError(err)
+}
+
+func (cl *disksClientImpl) delete(ctx context.Context, resourceGroupName string, diskName string) (result compute.DisksDeleteFuture, err error) {
+       r, err := cl.inner.Delete(ctx, resourceGroupName, diskName)
+       return r, wrapAzureError(err)
+}
+
 var quotaRe = regexp.MustCompile(`(?i:exceed|quota|limit)`)
 
 type azureRateLimitError struct {
@@ -196,20 +219,23 @@ func wrapAzureError(err error) error {
 }
 
 type azureInstanceSet struct {
-       azconfig     azureInstanceSetConfig
-       vmClient     virtualMachinesClientWrapper
-       netClient    interfacesClientWrapper
-       blobcont     containerWrapper
-       azureEnv     azure.Environment
-       interfaces   map[string]network.Interface
-       dispatcherID string
-       namePrefix   string
-       ctx          context.Context
-       stopFunc     context.CancelFunc
-       stopWg       sync.WaitGroup
-       deleteNIC    chan string
-       deleteBlob   chan storage.Blob
-       logger       logrus.FieldLogger
+       azconfig           azureInstanceSetConfig
+       vmClient           virtualMachinesClientWrapper
+       netClient          interfacesClientWrapper
+       disksClient        disksClientWrapper
+       imageResourceGroup string
+       blobcont           containerWrapper
+       azureEnv           azure.Environment
+       interfaces         map[string]network.Interface
+       dispatcherID       string
+       namePrefix         string
+       ctx                context.Context
+       stopFunc           context.CancelFunc
+       stopWg             sync.WaitGroup
+       deleteNIC          chan string
+       deleteBlob         chan storage.Blob
+       deleteDisk         chan compute.Disk
+       logger             logrus.FieldLogger
 }
 
 func newAzureInstanceSet(config json.RawMessage, dispatcherID cloud.InstanceSetID, _ cloud.SharedResourceTags, logger logrus.FieldLogger) (prv cloud.InstanceSet, err error) {
@@ -233,6 +259,7 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str
        az.azconfig = azcfg
        vmClient := compute.NewVirtualMachinesClient(az.azconfig.SubscriptionID)
        netClient := network.NewInterfacesClient(az.azconfig.SubscriptionID)
+       disksClient := compute.NewDisksClient(az.azconfig.SubscriptionID)
        storageAcctClient := storageacct.NewAccountsClient(az.azconfig.SubscriptionID)
 
        az.azureEnv, err = azure.EnvironmentFromName(az.azconfig.CloudEnvironment)
@@ -253,26 +280,38 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str
 
        vmClient.Authorizer = authorizer
        netClient.Authorizer = authorizer
+       disksClient.Authorizer = authorizer
        storageAcctClient.Authorizer = authorizer
 
        az.vmClient = &virtualMachinesClientImpl{vmClient}
        az.netClient = &interfacesClientImpl{netClient}
+       az.disksClient = &disksClientImpl{disksClient}
 
-       result, err := storageAcctClient.ListKeys(az.ctx, az.azconfig.ResourceGroup, az.azconfig.StorageAccount)
-       if err != nil {
-               az.logger.WithError(err).Warn("Couldn't get account keys")
-               return err
+       az.imageResourceGroup = az.azconfig.ImageResourceGroup
+       if az.imageResourceGroup == "" {
+               az.imageResourceGroup = az.azconfig.ResourceGroup
        }
 
-       key1 := *(*result.Keys)[0].Value
-       client, err := storage.NewBasicClientOnSovereignCloud(az.azconfig.StorageAccount, key1, az.azureEnv)
-       if err != nil {
-               az.logger.WithError(err).Warn("Couldn't make client")
-               return err
-       }
+       var client storage.Client
+       if az.azconfig.StorageAccount != "" && az.azconfig.BlobContainer != "" {
+               result, err := storageAcctClient.ListKeys(az.ctx, az.azconfig.ResourceGroup, az.azconfig.StorageAccount)
+               if err != nil {
+                       az.logger.WithError(err).Warn("Couldn't get account keys")
+                       return err
+               }
+
+               key1 := *(*result.Keys)[0].Value
+               client, err = storage.NewBasicClientOnSovereignCloud(az.azconfig.StorageAccount, key1, az.azureEnv)
+               if err != nil {
+                       az.logger.WithError(err).Warn("Couldn't make client")
+                       return err
+               }
 
-       blobsvc := client.GetBlobService()
-       az.blobcont = blobsvc.GetContainerReference(az.azconfig.BlobContainer)
+               blobsvc := client.GetBlobService()
+               az.blobcont = blobsvc.GetContainerReference(az.azconfig.BlobContainer)
+       } else if az.azconfig.StorageAccount != "" || az.azconfig.BlobContainer != "" {
+               az.logger.Error("Invalid configuration: StorageAccount and BlobContainer must both be empty or both be set")
+       }
 
        az.dispatcherID = dispatcherID
        az.namePrefix = fmt.Sprintf("compute-%s-", az.dispatcherID)
@@ -288,21 +327,21 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str
                                tk.Stop()
                                return
                        case <-tk.C:
-                               az.manageBlobs()
+                               if az.blobcont != nil {
+                                       az.manageBlobs()
+                               }
+                               az.manageDisks()
                        }
                }
        }()
 
        az.deleteNIC = make(chan string)
        az.deleteBlob = make(chan storage.Blob)
+       az.deleteDisk = make(chan compute.Disk)
 
        for i := 0; i < 4; i++ {
                go func() {
-                       for {
-                               nicname, ok := <-az.deleteNIC
-                               if !ok {
-                                       return
-                               }
+                       for nicname := range az.deleteNIC {
                                _, delerr := az.netClient.delete(context.Background(), az.azconfig.ResourceGroup, nicname)
                                if delerr != nil {
                                        az.logger.WithError(delerr).Warnf("Error deleting %v", nicname)
@@ -312,11 +351,7 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str
                        }
                }()
                go func() {
-                       for {
-                               blob, ok := <-az.deleteBlob
-                               if !ok {
-                                       return
-                               }
+                       for blob := range az.deleteBlob {
                                err := blob.Delete(nil)
                                if err != nil {
                                        az.logger.WithError(err).Warnf("Error deleting %v", blob.Name)
@@ -325,11 +360,28 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str
                                }
                        }
                }()
+               go func() {
+                       for disk := range az.deleteDisk {
+                               _, err := az.disksClient.delete(az.ctx, az.imageResourceGroup, *disk.Name)
+                               if err != nil {
+                                       az.logger.WithError(err).Warnf("Error deleting disk %+v", *disk.Name)
+                               } else {
+                                       az.logger.Printf("Deleted disk %v", *disk.Name)
+                               }
+                       }
+               }()
        }
 
        return nil
 }
 
+func (az *azureInstanceSet) cleanupNic(nic network.Interface) {
+       _, delerr := az.netClient.delete(context.Background(), az.azconfig.ResourceGroup, *nic.Name)
+       if delerr != nil {
+               az.logger.WithError(delerr).Warnf("Error cleaning up NIC after failed create")
+       }
+}
+
 func (az *azureInstanceSet) Create(
        instanceType arvados.InstanceType,
        imageID cloud.ImageID,
@@ -389,14 +441,55 @@ func (az *azureInstanceSet) Create(
                return nil, wrapAzureError(err)
        }
 
-       blobname := fmt.Sprintf("%s-os.vhd", name)
-       instanceVhd := fmt.Sprintf("https://%s.blob.%s/%s/%s",
-               az.azconfig.StorageAccount,
-               az.azureEnv.StorageEndpointSuffix,
-               az.azconfig.BlobContainer,
-               blobname)
-
+       var blobname string
        customData := base64.StdEncoding.EncodeToString([]byte("#!/bin/sh\n" + initCommand + "\n"))
+       var storageProfile *compute.StorageProfile
+
+       re := regexp.MustCompile(`^http(s?)://`)
+       if re.MatchString(string(imageID)) {
+               if az.blobcont == nil {
+                       az.cleanupNic(nic)
+                       return nil, wrapAzureError(errors.New("Invalid configuration: can't configure unmanaged image URL without StorageAccount and BlobContainer"))
+               }
+               blobname = fmt.Sprintf("%s-os.vhd", name)
+               instanceVhd := fmt.Sprintf("https://%s.blob.%s/%s/%s",
+                       az.azconfig.StorageAccount,
+                       az.azureEnv.StorageEndpointSuffix,
+                       az.azconfig.BlobContainer,
+                       blobname)
+               az.logger.Warn("using deprecated unmanaged image, see https://doc.arvados.org/ to migrate to managed disks")
+               storageProfile = &compute.StorageProfile{
+                       OsDisk: &compute.OSDisk{
+                               OsType:       compute.Linux,
+                               Name:         to.StringPtr(name + "-os"),
+                               CreateOption: compute.DiskCreateOptionTypesFromImage,
+                               Image: &compute.VirtualHardDisk{
+                                       URI: to.StringPtr(string(imageID)),
+                               },
+                               Vhd: &compute.VirtualHardDisk{
+                                       URI: &instanceVhd,
+                               },
+                       },
+               }
+       } else {
+               id := to.StringPtr("/subscriptions/" + az.azconfig.SubscriptionID + "/resourceGroups/" + az.imageResourceGroup + "/providers/Microsoft.Compute/images/" + string(imageID))
+               if az.azconfig.SharedImageGalleryName != "" && az.azconfig.SharedImageGalleryImageVersion != "" {
+                       id = to.StringPtr("/subscriptions/" + az.azconfig.SubscriptionID + "/resourceGroups/" + az.imageResourceGroup + "/providers/Microsoft.Compute/galleries/" + az.azconfig.SharedImageGalleryName + "/images/" + string(imageID) + "/versions/" + az.azconfig.SharedImageGalleryImageVersion)
+               } else if az.azconfig.SharedImageGalleryName != "" || az.azconfig.SharedImageGalleryImageVersion != "" {
+                       az.cleanupNic(nic)
+                       return nil, wrapAzureError(errors.New("Invalid configuration: SharedImageGalleryName and SharedImageGalleryImageVersion must both be set or both be empty"))
+               }
+               storageProfile = &compute.StorageProfile{
+                       ImageReference: &compute.ImageReference{
+                               ID: id,
+                       },
+                       OsDisk: &compute.OSDisk{
+                               OsType:       compute.Linux,
+                               Name:         to.StringPtr(name + "-os"),
+                               CreateOption: compute.DiskCreateOptionTypesFromImage,
+                       },
+               }
+       }
 
        vmParameters := compute.VirtualMachine{
                Location: &az.azconfig.Location,
@@ -405,19 +498,7 @@ func (az *azureInstanceSet) Create(
                        HardwareProfile: &compute.HardwareProfile{
                                VMSize: compute.VirtualMachineSizeTypes(instanceType.ProviderType),
                        },
-                       StorageProfile: &compute.StorageProfile{
-                               OsDisk: &compute.OSDisk{
-                                       OsType:       compute.Linux,
-                                       Name:         to.StringPtr(name + "-os"),
-                                       CreateOption: compute.FromImage,
-                                       Image: &compute.VirtualHardDisk{
-                                               URI: to.StringPtr(string(imageID)),
-                                       },
-                                       Vhd: &compute.VirtualHardDisk{
-                                               URI: &instanceVhd,
-                                       },
-                               },
-                       },
+                       StorageProfile: storageProfile,
                        NetworkProfile: &compute.NetworkProfile{
                                NetworkInterfaces: &[]compute.NetworkInterfaceReference{
                                        compute.NetworkInterfaceReference{
@@ -449,15 +530,21 @@ func (az *azureInstanceSet) Create(
 
        vm, err := az.vmClient.createOrUpdate(az.ctx, az.azconfig.ResourceGroup, name, vmParameters)
        if err != nil {
-               _, delerr := az.blobcont.GetBlobReference(blobname).DeleteIfExists(nil)
-               if delerr != nil {
-                       az.logger.WithError(delerr).Warnf("Error cleaning up vhd blob after failed create")
+               // Do some cleanup. Otherwise, an unbounded number of new unused nics and
+               // blobs can pile up during times when VMs can't be created and the
+               // dispatcher keeps retrying, because the garbage collection in manageBlobs
+               // and manageNics is only triggered periodically. This is most important
+               // for nics, because those are subject to a quota.
+               az.cleanupNic(nic)
+
+               if blobname != "" {
+                       _, delerr := az.blobcont.GetBlobReference(blobname).DeleteIfExists(nil)
+                       if delerr != nil {
+                               az.logger.WithError(delerr).Warnf("Error cleaning up vhd blob after failed create")
+                       }
                }
 
-               _, delerr = az.netClient.delete(context.Background(), az.azconfig.ResourceGroup, *nic.Name)
-               if delerr != nil {
-                       az.logger.WithError(delerr).Warnf("Error cleaning up NIC after failed create")
-               }
+               // Leave cleaning up of managed disks to the garbage collection in manageDisks()
 
                return nil, wrapAzureError(err)
        }
@@ -497,7 +584,7 @@ func (az *azureInstanceSet) Instances(cloud.InstanceTags) ([]cloud.Instance, err
        return instances, nil
 }
 
-// ManageNics returns a list of Azure network interface resources.
+// manageNics returns a list of Azure network interface resources.
 // Also performs garbage collection of NICs which have "namePrefix",
 // are not associated with a virtual machine and have a "created-at"
 // time more than DeleteDanglingResourcesAfter (to prevent racing and
@@ -538,7 +625,7 @@ func (az *azureInstanceSet) manageNics() (map[string]network.Interface, error) {
        return interfaces, nil
 }
 
-// ManageBlobs garbage collects blobs (VM disk images) in the
+// manageBlobs garbage collects blobs (VM disk images) in the
 // configured storage account container.  It will delete blobs which
 // have "namePrefix", are "available" (which means they are not
 // leased to a VM) and haven't been modified for
@@ -573,11 +660,41 @@ func (az *azureInstanceSet) manageBlobs() {
        }
 }
 
+// manageDisks garbage collects managed compute disks (VM disk images) in the
+// configured resource group.  It will delete disks which have "namePrefix",
+// are "unattached" (which means they are not leased to a VM) and were created
+// more than DeleteDanglingResourcesAfter seconds ago.  (Azure provides no
+// modification timestamp on managed disks, there is only a creation timestamp)
+func (az *azureInstanceSet) manageDisks() {
+
+       re := regexp.MustCompile(`^` + regexp.QuoteMeta(az.namePrefix) + `.*-os$`)
+       threshold := time.Now().Add(-az.azconfig.DeleteDanglingResourcesAfter.Duration())
+
+       response, err := az.disksClient.listByResourceGroup(az.ctx, az.imageResourceGroup)
+       if err != nil {
+               az.logger.WithError(err).Warn("Error listing disks")
+               return
+       }
+
+       for ; response.NotDone(); err = response.Next() {
+               for _, d := range response.Values() {
+                       if d.DiskProperties.DiskState == compute.Unattached &&
+                               d.Name != nil && re.MatchString(*d.Name) &&
+                               d.DiskProperties.TimeCreated.ToTime().Before(threshold) {
+
+                               az.logger.Printf("Disk %v is unlocked and was created at %+v, will delete", *d.Name, d.DiskProperties.TimeCreated.ToTime())
+                               az.deleteDisk <- d
+                       }
+               }
+       }
+}
+
 func (az *azureInstanceSet) Stop() {
        az.stopFunc()
        az.stopWg.Wait()
        close(az.deleteNIC)
        close(az.deleteBlob)
+       close(az.deleteDisk)
 }
 
 type azureInstance struct {
index 94af0b9a26dc8c7587b0d5e87bba013216c3f266..7b5a34df59798b781222cf52131fee0d1e7eade0 100644 (file)
@@ -47,7 +47,7 @@ import (
        "git.arvados.org/arvados.git/lib/dispatchcloud/test"
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/config"
-       "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-06-01/compute"
+       "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute"
        "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-06-01/network"
        "github.com/Azure/azure-sdk-for-go/storage"
        "github.com/Azure/go-autorest/autorest"
@@ -156,6 +156,7 @@ func GetInstanceSet() (cloud.InstanceSet, cloud.ImageID, arvados.Cluster, error)
                logger:       logrus.StandardLogger(),
                deleteNIC:    make(chan string),
                deleteBlob:   make(chan storage.Blob),
+               deleteDisk:   make(chan compute.Disk),
        }
        ap.ctx, ap.stopFunc = context.WithCancel(context.Background())
        ap.vmClient = &VirtualMachinesClientStub{}
index cac0ac61d70fe6c3e86458bd11c13ee799d15dd3..8b6c1ce56ba20704411a3bac792c9b17c29e99bd 100644 (file)
@@ -483,6 +483,9 @@ Clusters:
       # Use of this feature is not recommended, if it can be avoided.
       ForwardSlashNameSubstitution: ""
 
+      # Include "folder objects" in S3 ListObjects responses.
+      S3FolderObjects: true
+
       # Managed collection properties. At creation time, if the client didn't
       # provide the listed keys, they will be automatically populated following
       # one of the following behaviors:
@@ -959,6 +962,12 @@ Clusters:
         TimeoutShutdown: 10s
 
         # Worker VM image ID.
+        # (aws) AMI identifier
+        # (azure) managed disks: the name of the managed disk image
+        # (azure) shared image gallery: the name of the image definition. Also
+        # see the SharedImageGalleryName and SharedImageGalleryImageVersion fields.
+        # (azure) unmanaged disks (deprecated): the complete URI of the VHD, e.g.
+        # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd
         ImageID: ""
 
         # An executable file (located on the dispatcher host) to be
@@ -1027,7 +1036,16 @@ Clusters:
           Network: ""
           Subnet: ""
 
-          # (azure) Where to store the VM VHD blobs
+          # (azure) managed disks: The resource group where the managed disk
+          # image can be found (if different from ResourceGroup).
+          ImageResourceGroup: ""
+
+          # (azure) shared image gallery: the name of the gallery
+          SharedImageGalleryName: ""
+          # (azure) shared image gallery: the version of the image definition
+          SharedImageGalleryImageVersion: ""
+
+          # (azure) unmanaged disks (deprecated): Where to store the VM VHD blobs
           StorageAccount: ""
           BlobContainer: ""
 
index 251415f711360b5a61e5109aeadad60347a5d066..5cdc3dae6a11b3dd5464743aee7213a2bd5d21d6 100644 (file)
@@ -59,10 +59,10 @@ func ExportJSON(w io.Writer, cluster *arvados.Cluster) error {
 // exists.
 var whitelist = map[string]bool{
        // | sort -t'"' -k2,2
-       "ClusterID":                                    true,
        "API":                                          true,
        "API.AsyncPermissionsUpdateInterval":           false,
        "API.DisabledAPIs":                             false,
+       "API.KeepServiceRequestTimeout":                false,
        "API.MaxConcurrentRequests":                    false,
        "API.MaxIndexDatabaseRead":                     false,
        "API.MaxItemsPerResponse":                      true,
@@ -71,24 +71,29 @@ var whitelist = map[string]bool{
        "API.MaxRequestSize":                           true,
        "API.RailsSessionSecretToken":                  false,
        "API.RequestTimeout":                           true,
-       "API.WebsocketClientEventQueue":                false,
        "API.SendTimeout":                              true,
+       "API.WebsocketClientEventQueue":                false,
        "API.WebsocketServerEventQueue":                false,
-       "API.KeepServiceRequestTimeout":                false,
        "AuditLogs":                                    false,
        "AuditLogs.MaxAge":                             false,
        "AuditLogs.MaxDeleteBatch":                     false,
        "AuditLogs.UnloggedAttributes":                 false,
+       "ClusterID":                                    true,
        "Collections":                                  true,
+       "Collections.BalanceCollectionBatch":           false,
+       "Collections.BalanceCollectionBuffers":         false,
+       "Collections.BalancePeriod":                    false,
+       "Collections.BalanceTimeout":                   false,
+       "Collections.BlobDeleteConcurrency":            false,
+       "Collections.BlobMissingReport":                false,
+       "Collections.BlobReplicateConcurrency":         false,
        "Collections.BlobSigning":                      true,
        "Collections.BlobSigningKey":                   false,
        "Collections.BlobSigningTTL":                   true,
        "Collections.BlobTrash":                        false,
-       "Collections.BlobTrashLifetime":                false,
-       "Collections.BlobTrashConcurrency":             false,
        "Collections.BlobTrashCheckInterval":           false,
-       "Collections.BlobDeleteConcurrency":            false,
-       "Collections.BlobReplicateConcurrency":         false,
+       "Collections.BlobTrashConcurrency":             false,
+       "Collections.BlobTrashLifetime":                false,
        "Collections.CollectionVersioning":             false,
        "Collections.DefaultReplication":               true,
        "Collections.DefaultTrashLifetime":             true,
@@ -97,18 +102,14 @@ var whitelist = map[string]bool{
        "Collections.ManagedProperties.*":              true,
        "Collections.ManagedProperties.*.*":            true,
        "Collections.PreserveVersionIfIdle":            true,
+       "Collections.S3FolderObjects":                  true,
        "Collections.TrashSweepInterval":               false,
        "Collections.TrustAllContent":                  false,
        "Collections.WebDAVCache":                      false,
-       "Collections.BalanceCollectionBatch":           false,
-       "Collections.BalancePeriod":                    false,
-       "Collections.BalanceTimeout":                   false,
-       "Collections.BlobMissingReport":                false,
-       "Collections.BalanceCollectionBuffers":         false,
        "Containers":                                   true,
        "Containers.CloudVMs":                          false,
-       "Containers.CrunchRunCommand":                  false,
        "Containers.CrunchRunArgumentsList":            false,
+       "Containers.CrunchRunCommand":                  false,
        "Containers.DefaultKeepCacheRAM":               true,
        "Containers.DispatchPrivateKey":                false,
        "Containers.JobsAPI":                           true,
@@ -155,31 +156,31 @@ var whitelist = map[string]bool{
        "Login.OpenIDConnect":                          true,
        "Login.OpenIDConnect.ClientID":                 false,
        "Login.OpenIDConnect.ClientSecret":             false,
-       "Login.OpenIDConnect.Enable":                   true,
-       "Login.OpenIDConnect.Issuer":                   false,
        "Login.OpenIDConnect.EmailClaim":               false,
        "Login.OpenIDConnect.EmailVerifiedClaim":       false,
+       "Login.OpenIDConnect.Enable":                   true,
+       "Login.OpenIDConnect.Issuer":                   false,
        "Login.OpenIDConnect.UsernameClaim":            false,
        "Login.PAM":                                    true,
        "Login.PAM.DefaultEmailDomain":                 false,
        "Login.PAM.Enable":                             true,
        "Login.PAM.Service":                            false,
+       "Login.RemoteTokenRefresh":                     true,
        "Login.SSO":                                    true,
        "Login.SSO.Enable":                             true,
        "Login.SSO.ProviderAppID":                      false,
        "Login.SSO.ProviderAppSecret":                  false,
-       "Login.RemoteTokenRefresh":                     true,
        "Login.Test":                                   true,
        "Login.Test.Enable":                            true,
        "Login.Test.Users":                             false,
        "Mail":                                         true,
+       "Mail.EmailFrom":                               false,
+       "Mail.IssueReporterEmailFrom":                  false,
+       "Mail.IssueReporterEmailTo":                    false,
        "Mail.MailchimpAPIKey":                         false,
        "Mail.MailchimpListID":                         false,
        "Mail.SendUserSetupNotificationEmail":          false,
-       "Mail.IssueReporterEmailFrom":                  false,
-       "Mail.IssueReporterEmailTo":                    false,
        "Mail.SupportEmailAddress":                     true,
-       "Mail.EmailFrom":                               false,
        "ManagementToken":                              false,
        "PostgreSQL":                                   false,
        "RemoteClusters":                               true,
@@ -197,8 +198,8 @@ var whitelist = map[string]bool{
        "SystemRootToken":                              false,
        "TLS":                                          false,
        "Users":                                        true,
-       "Users.AnonymousUserToken":                     true,
        "Users.AdminNotifierEmailFrom":                 false,
+       "Users.AnonymousUserToken":                     true,
        "Users.AutoAdminFirstUser":                     false,
        "Users.AutoAdminUserWithEmail":                 false,
        "Users.AutoSetupNewUsers":                      false,
@@ -235,6 +236,7 @@ var whitelist = map[string]bool{
        "Workbench.EnableGettingStartedPopup":          true,
        "Workbench.EnablePublicProjectsPage":           true,
        "Workbench.FileViewersConfigURL":               true,
+       "Workbench.InactivePageHTML":                   true,
        "Workbench.LogViewerMaxBytes":                  true,
        "Workbench.MultiSiteSearch":                    true,
        "Workbench.ProfilingEnabled":                   true,
@@ -246,6 +248,8 @@ var whitelist = map[string]bool{
        "Workbench.ShowUserAgreementInline":            true,
        "Workbench.ShowUserNotifications":              true,
        "Workbench.SiteName":                           true,
+       "Workbench.SSHHelpHostSuffix":                  true,
+       "Workbench.SSHHelpPageHTML":                    true,
        "Workbench.Theme":                              true,
        "Workbench.UserProfileFormFields":              true,
        "Workbench.UserProfileFormFields.*":            true,
@@ -254,9 +258,6 @@ var whitelist = map[string]bool{
        "Workbench.UserProfileFormMessage":             true,
        "Workbench.VocabularyURL":                      true,
        "Workbench.WelcomePageHTML":                    true,
-       "Workbench.InactivePageHTML":                   true,
-       "Workbench.SSHHelpPageHTML":                    true,
-       "Workbench.SSHHelpHostSuffix":                  true,
 }
 
 func redactUnsafe(m map[string]interface{}, mPrefix, lookupPrefix string) error {
index e3bd13fd2686254381005edeb7da6e10e813843f..74879d23999a68ab1fb0406f13c5d067955711e8 100644 (file)
@@ -489,6 +489,9 @@ Clusters:
       # Use of this feature is not recommended, if it can be avoided.
       ForwardSlashNameSubstitution: ""
 
+      # Include "folder objects" in S3 ListObjects responses.
+      S3FolderObjects: true
+
       # Managed collection properties. At creation time, if the client didn't
       # provide the listed keys, they will be automatically populated following
       # one of the following behaviors:
@@ -965,6 +968,12 @@ Clusters:
         TimeoutShutdown: 10s
 
         # Worker VM image ID.
+        # (aws) AMI identifier
+        # (azure) managed disks: the name of the managed disk image
+        # (azure) shared image gallery: the name of the image definition. Also
+        # see the SharedImageGalleryName and SharedImageGalleryImageVersion fields.
+        # (azure) unmanaged disks (deprecated): the complete URI of the VHD, e.g.
+        # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd
         ImageID: ""
 
         # An executable file (located on the dispatcher host) to be
@@ -1033,7 +1042,16 @@ Clusters:
           Network: ""
           Subnet: ""
 
-          # (azure) Where to store the VM VHD blobs
+          # (azure) managed disks: The resource group where the managed disk
+          # image can be found (if different from ResourceGroup).
+          ImageResourceGroup: ""
+
+          # (azure) shared image gallery: the name of the gallery
+          SharedImageGalleryName: ""
+          # (azure) shared image gallery: the version of the image definition
+          SharedImageGalleryImageVersion: ""
+
+          # (azure) unmanaged disks (deprecated): Where to store the VM VHD blobs
           StorageAccount: ""
           BlobContainer: ""
 
index f60adf5385ce7489ccaace30423847033698f579..4096a2eb156b39bc26a94a428342dbd77815f56a 100644 (file)
@@ -46,7 +46,8 @@ Gem::Specification.new do |s|
   s.add_runtime_dependency 'json', '>= 1.7.7', '<3'
   s.add_runtime_dependency 'optimist', '~> 3.0'
   s.add_runtime_dependency 'andand', '~> 1.3', '>= 1.3.3'
-  s.add_runtime_dependency 'oj', '~> 3.0'
+  # oj 3.10.9 requires ruby >= 2.4 and arvbox doesn't currently have it because of SSO
+  s.add_runtime_dependency 'oj', '< 3.10.9'
   s.add_runtime_dependency 'curb', '~> 0.8'
   s.add_runtime_dependency 'launchy', '< 2.5'
   # arvados-google-api-client 0.8.7.2 is incompatible with faraday 0.16.2
index e123671d4a222b146b50770e1406d42a18796e99..c87f880e5e56365e4bbec06c211a72cee9e0ee7c 100644 (file)
@@ -121,6 +121,7 @@ type Cluster struct {
                TrashSweepInterval           Duration
                TrustAllContent              bool
                ForwardSlashNameSubstitution string
+               S3FolderObjects              bool
 
                BlobMissingReport        string
                BalancePeriod            Duration
index d06aba3695adc37f3d74057de2568778bcd9f9c9..5e57fed3beab3281b1d498936edb4eed813398ec 100644 (file)
@@ -31,6 +31,10 @@ var (
        ErrPermission        = os.ErrPermission
 )
 
+type syncer interface {
+       Sync() error
+}
+
 // A File is an *os.File-like interface for reading and writing files
 // in a FileSystem.
 type File interface {
@@ -299,6 +303,22 @@ func (n *treenode) Readdir() (fi []os.FileInfo, err error) {
        return
 }
 
+func (n *treenode) Sync() error {
+       n.RLock()
+       defer n.RUnlock()
+       for _, inode := range n.inodes {
+               syncer, ok := inode.(syncer)
+               if !ok {
+                       return ErrInvalidOperation
+               }
+               err := syncer.Sync()
+               if err != nil {
+                       return err
+               }
+       }
+       return nil
+}
+
 type fileSystem struct {
        root inode
        fsBackend
@@ -576,8 +596,11 @@ func (fs *fileSystem) remove(name string, recursive bool) error {
 }
 
 func (fs *fileSystem) Sync() error {
-       log.Printf("TODO: sync fileSystem")
-       return ErrInvalidOperation
+       if syncer, ok := fs.root.(syncer); ok {
+               return syncer.Sync()
+       } else {
+               return ErrInvalidOperation
+       }
 }
 
 func (fs *fileSystem) Flush(string, bool) error {
index 37bd494914df507dc9fc193576dc9e0afcc98ea9..0edc48162b1a031b8487ac9b38997c0a4b6f3f4d 100644 (file)
@@ -121,6 +121,62 @@ func (fs *collectionFileSystem) newNode(name string, perm os.FileMode, modTime t
        }
 }
 
+func (fs *collectionFileSystem) Child(name string, replace func(inode) (inode, error)) (inode, error) {
+       return fs.rootnode().Child(name, replace)
+}
+
+func (fs *collectionFileSystem) FS() FileSystem {
+       return fs
+}
+
+func (fs *collectionFileSystem) FileInfo() os.FileInfo {
+       return fs.rootnode().FileInfo()
+}
+
+func (fs *collectionFileSystem) IsDir() bool {
+       return true
+}
+
+func (fs *collectionFileSystem) Lock() {
+       fs.rootnode().Lock()
+}
+
+func (fs *collectionFileSystem) Unlock() {
+       fs.rootnode().Unlock()
+}
+
+func (fs *collectionFileSystem) RLock() {
+       fs.rootnode().RLock()
+}
+
+func (fs *collectionFileSystem) RUnlock() {
+       fs.rootnode().RUnlock()
+}
+
+func (fs *collectionFileSystem) Parent() inode {
+       return fs.rootnode().Parent()
+}
+
+func (fs *collectionFileSystem) Read(_ []byte, ptr filenodePtr) (int, filenodePtr, error) {
+       return 0, ptr, ErrInvalidOperation
+}
+
+func (fs *collectionFileSystem) Write(_ []byte, ptr filenodePtr) (int, filenodePtr, error) {
+       return 0, ptr, ErrInvalidOperation
+}
+
+func (fs *collectionFileSystem) Readdir() ([]os.FileInfo, error) {
+       return fs.rootnode().Readdir()
+}
+
+func (fs *collectionFileSystem) SetParent(parent inode, name string) {
+       fs.rootnode().SetParent(parent, name)
+}
+
+func (fs *collectionFileSystem) Truncate(int64) error {
+       return ErrInvalidOperation
+}
+
 func (fs *collectionFileSystem) Sync() error {
        if fs.uuid == "" {
                return nil
index f01369a885ece3b7c315c832b114caaf77715862..59a6a6ba825e57928e9348c17d971988fa24fc94 100644 (file)
@@ -7,7 +7,6 @@ package arvados
 import (
        "bytes"
        "crypto/md5"
-       "crypto/sha1"
        "errors"
        "fmt"
        "io"
@@ -33,6 +32,9 @@ type keepClientStub struct {
        blocks      map[string][]byte
        refreshable map[string]bool
        onPut       func(bufcopy []byte) // called from PutB, before acquiring lock
+       authToken   string               // client's auth token (used for signing locators)
+       sigkey      string               // blob signing key
+       sigttl      time.Duration        // blob signing ttl
        sync.RWMutex
 }
 
@@ -49,7 +51,7 @@ func (kcs *keepClientStub) ReadAt(locator string, p []byte, off int) (int, error
 }
 
 func (kcs *keepClientStub) PutB(p []byte) (string, int, error) {
-       locator := fmt.Sprintf("%x+%d+A12345@abcde", md5.Sum(p), len(p))
+       locator := SignLocator(fmt.Sprintf("%x+%d", md5.Sum(p), len(p)), kcs.authToken, time.Now().Add(kcs.sigttl), kcs.sigttl, []byte(kcs.sigkey))
        buf := make([]byte, len(p))
        copy(buf, p)
        if kcs.onPut != nil {
@@ -61,9 +63,12 @@ func (kcs *keepClientStub) PutB(p []byte) (string, int, error) {
        return locator, 1, nil
 }
 
-var localOrRemoteSignature = regexp.MustCompile(`\+[AR][^+]*`)
+var reRemoteSignature = regexp.MustCompile(`\+[AR][^+]*`)
 
 func (kcs *keepClientStub) LocalLocator(locator string) (string, error) {
+       if strings.Contains(locator, "+A") {
+               return locator, nil
+       }
        kcs.Lock()
        defer kcs.Unlock()
        if strings.Contains(locator, "+R") {
@@ -74,8 +79,9 @@ func (kcs *keepClientStub) LocalLocator(locator string) (string, error) {
                        return "", fmt.Errorf("kcs.refreshable[%q]==false", locator)
                }
        }
-       fakeSig := fmt.Sprintf("+A%x@%x", sha1.Sum(nil), time.Now().Add(time.Hour*24*14).Unix())
-       return localOrRemoteSignature.ReplaceAllLiteralString(locator, fakeSig), nil
+       locator = reRemoteSignature.ReplaceAllLiteralString(locator, "")
+       locator = SignLocator(locator, kcs.authToken, time.Now().Add(kcs.sigttl), kcs.sigttl, []byte(kcs.sigkey))
+       return locator, nil
 }
 
 type CollectionFSSuite struct {
@@ -92,7 +98,11 @@ func (s *CollectionFSSuite) SetUpTest(c *check.C) {
        s.kc = &keepClientStub{
                blocks: map[string][]byte{
                        "3858f62230ac3c915f300c664312c63f": []byte("foobar"),
-               }}
+               },
+               sigkey:    fixtureBlobSigningKey,
+               sigttl:    fixtureBlobSigningTTL,
+               authToken: fixtureActiveToken,
+       }
        s.fs, err = s.coll.FileSystem(s.client, s.kc)
        c.Assert(err, check.IsNil)
 }
index 439eaec7c2a5dbde49f2fd2851551238a22166ec..254b90c812a337de96cb34da01b767dbe7adcc5a 100644 (file)
@@ -32,14 +32,14 @@ func deferredCollectionFS(fs FileSystem, parent inode, coll Collection) inode {
                        log.Printf("BUG: unhandled error: %s", err)
                        return placeholder
                }
-               cfs, err := coll.FileSystem(fs, fs)
+               newfs, err := coll.FileSystem(fs, fs)
                if err != nil {
                        log.Printf("BUG: unhandled error: %s", err)
                        return placeholder
                }
-               root := cfs.rootnode()
-               root.SetParent(parent, coll.Name)
-               return root
+               cfs := newfs.(*collectionFileSystem)
+               cfs.SetParent(parent, coll.Name)
+               return cfs
        }}
 }
 
@@ -87,6 +87,19 @@ func (dn *deferrednode) Child(name string, replace func(inode) (inode, error)) (
        return dn.realinode().Child(name, replace)
 }
 
+// Sync is a no-op if the real inode hasn't even been created yet.
+func (dn *deferrednode) Sync() error {
+       dn.mtx.Lock()
+       defer dn.mtx.Unlock()
+       if !dn.created {
+               return nil
+       } else if syncer, ok := dn.wrapped.(syncer); ok {
+               return syncer.Sync()
+       } else {
+               return ErrInvalidOperation
+       }
+}
+
 func (dn *deferrednode) Truncate(size int64) error       { return dn.realinode().Truncate(size) }
 func (dn *deferrednode) SetParent(p inode, name string)  { dn.realinode().SetParent(p, name) }
 func (dn *deferrednode) IsDir() bool                     { return dn.currentinode().IsDir() }
index 42322a14a9adda155c75f225ef43e2cdfd96615c..56b5953234784424e51676a90b4c148661cb8c4d 100644 (file)
@@ -15,7 +15,7 @@ import (
 //
 // See (*customFileSystem)MountUsers for example usage.
 type lookupnode struct {
-       inode
+       treenode
        loadOne func(parent inode, name string) (inode, error)
        loadAll func(parent inode) ([]inode, error)
        stale   func(time.Time) bool
@@ -26,6 +26,20 @@ type lookupnode struct {
        staleOne  map[string]time.Time
 }
 
+// Sync flushes pending writes for loaded children and, if successful,
+// triggers a reload on next lookup.
+func (ln *lookupnode) Sync() error {
+       err := ln.treenode.Sync()
+       if err != nil {
+               return err
+       }
+       ln.staleLock.Lock()
+       ln.staleAll = time.Time{}
+       ln.staleOne = nil
+       ln.staleLock.Unlock()
+       return nil
+}
+
 func (ln *lookupnode) Readdir() ([]os.FileInfo, error) {
        ln.staleLock.Lock()
        defer ln.staleLock.Unlock()
@@ -36,7 +50,7 @@ func (ln *lookupnode) Readdir() ([]os.FileInfo, error) {
                        return nil, err
                }
                for _, child := range all {
-                       _, err = ln.inode.Child(child.FileInfo().Name(), func(inode) (inode, error) {
+                       _, err = ln.treenode.Child(child.FileInfo().Name(), func(inode) (inode, error) {
                                return child, nil
                        })
                        if err != nil {
@@ -49,25 +63,47 @@ func (ln *lookupnode) Readdir() ([]os.FileInfo, error) {
                // newer than ln.staleAll. Reclaim memory.
                ln.staleOne = nil
        }
-       return ln.inode.Readdir()
+       return ln.treenode.Readdir()
 }
 
+// Child rejects (with ErrInvalidArgument) calls to add/replace
+// children, instead calling loadOne when a non-existing child is
+// looked up.
 func (ln *lookupnode) Child(name string, replace func(inode) (inode, error)) (inode, error) {
        ln.staleLock.Lock()
        defer ln.staleLock.Unlock()
        checkTime := time.Now()
+       var existing inode
+       var err error
        if ln.stale(ln.staleAll) && ln.stale(ln.staleOne[name]) {
-               _, err := ln.inode.Child(name, func(inode) (inode, error) {
+               existing, err = ln.treenode.Child(name, func(inode) (inode, error) {
                        return ln.loadOne(ln, name)
                })
-               if err != nil {
-                       return nil, err
+               if err == nil && existing != nil {
+                       if ln.staleOne == nil {
+                               ln.staleOne = map[string]time.Time{name: checkTime}
+                       } else {
+                               ln.staleOne[name] = checkTime
+                       }
                }
-               if ln.staleOne == nil {
-                       ln.staleOne = map[string]time.Time{name: checkTime}
-               } else {
-                       ln.staleOne[name] = checkTime
+       } else {
+               existing, err = ln.treenode.Child(name, nil)
+               if err != nil && !os.IsNotExist(err) {
+                       return existing, err
+               }
+       }
+       if replace != nil {
+               // Let the callback try to delete or replace the
+               // existing node; if it does, return
+               // ErrInvalidArgument.
+               if tryRepl, err := replace(existing); err != nil {
+                       // Propagate error from callback
+                       return existing, err
+               } else if tryRepl != existing {
+                       return existing, ErrInvalidArgument
                }
        }
-       return ln.inode.Child(name, replace)
+       // Return original error from ln.treenode.Child() (it might be
+       // ErrNotExist).
+       return existing, err
 }
index c5eb03360a3877b577168611a8e579329b6abfa8..bf6391a74e4455ea076f82ac49173cb36e825a48 100644 (file)
@@ -6,7 +6,6 @@ package arvados
 
 import (
        "log"
-       "os"
        "strings"
 )
 
@@ -57,7 +56,7 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in
                // both "/" and the substitution string.
        }
        if len(contents.Items) == 0 {
-               return nil, os.ErrNotExist
+               return nil, nil
        }
        coll := contents.Items[0]
 
index 61d82c7fa9f4e442d6492ba8fc0f285df76bd5f2..cb2e54bda261ed590b39b59a90235f6b991787a3 100644 (file)
@@ -200,6 +200,22 @@ func (s *SiteFSSuite) TestProjectUpdatedByOther(c *check.C) {
        err = wf.Close()
        c.Check(err, check.IsNil)
 
+       err = project.Sync()
+       c.Check(err, check.IsNil)
+       _, err = s.fs.Open("/home/A Project/oob/test.txt")
+       c.Check(err, check.IsNil)
+
+       // Sync again to mark the project dir as stale, so the
+       // collection gets reloaded from the controller on next
+       // lookup.
+       err = project.Sync()
+       c.Check(err, check.IsNil)
+
+       // Ensure collection was flushed by Sync
+       var latest Collection
+       err = s.client.RequestAndDecode(&latest, "GET", "arvados/v1/collections/"+oob.UUID, nil, nil)
+       c.Check(latest.ManifestText, check.Matches, `.*:test.txt.*\n`)
+
        // Delete test.txt behind s.fs's back by updating the
        // collection record with an empty ManifestText.
        err = s.client.RequestAndDecode(nil, "PATCH", "arvados/v1/collections/"+oob.UUID, nil, map[string]interface{}{
@@ -210,8 +226,6 @@ func (s *SiteFSSuite) TestProjectUpdatedByOther(c *check.C) {
        })
        c.Assert(err, check.IsNil)
 
-       err = project.Sync()
-       c.Check(err, check.IsNil)
        _, err = s.fs.Open("/home/A Project/oob/test.txt")
        c.Check(err, check.NotNil)
        _, err = s.fs.Open("/home/A Project/oob")
@@ -221,7 +235,27 @@ func (s *SiteFSSuite) TestProjectUpdatedByOther(c *check.C) {
        c.Assert(err, check.IsNil)
 
        err = project.Sync()
-       c.Check(err, check.IsNil)
+       c.Check(err, check.NotNil) // can't update the deleted collection
        _, err = s.fs.Open("/home/A Project/oob")
-       c.Check(err, check.NotNil)
+       c.Check(err, check.IsNil) // parent dir still has old collection -- didn't reload, because Sync failed
+}
+
+func (s *SiteFSSuite) TestProjectUnsupportedOperations(c *check.C) {
+       s.fs.MountByID("by_id")
+       s.fs.MountProject("home", "")
+
+       _, err := s.fs.OpenFile("/home/A Project/newfilename", os.O_CREATE|os.O_RDWR, 0)
+       c.Check(err, check.ErrorMatches, "invalid argument")
+
+       err = s.fs.Mkdir("/home/A Project/newdirname", 0)
+       c.Check(err, check.ErrorMatches, "invalid argument")
+
+       err = s.fs.Mkdir("/by_id/newdirname", 0)
+       c.Check(err, check.ErrorMatches, "invalid argument")
+
+       err = s.fs.Mkdir("/by_id/"+fixtureAProjectUUID+"/newdirname", 0)
+       c.Check(err, check.ErrorMatches, "invalid argument")
+
+       _, err = s.fs.OpenFile("/home/A Project", 0, 0)
+       c.Check(err, check.IsNil)
 }
index 7826d335c81fa93cfe54bf39a81a66335a65d336..900893aa36420e7c9d2008fff31b36d4bd03e0bf 100644 (file)
@@ -40,7 +40,7 @@ func (c *Client) CustomFileSystem(kc keepClient) CustomFileSystem {
                        thr:       newThrottle(concurrentWriters),
                },
        }
-       root.inode = &treenode{
+       root.treenode = treenode{
                fs:     fs,
                parent: root,
                fileinfo: fileinfo{
@@ -54,9 +54,9 @@ func (c *Client) CustomFileSystem(kc keepClient) CustomFileSystem {
 }
 
 func (fs *customFileSystem) MountByID(mount string) {
-       fs.root.inode.Child(mount, func(inode) (inode, error) {
+       fs.root.treenode.Child(mount, func(inode) (inode, error) {
                return &vdirnode{
-                       inode: &treenode{
+                       treenode: treenode{
                                fs:     fs,
                                parent: fs.root,
                                inodes: make(map[string]inode),
@@ -72,18 +72,18 @@ func (fs *customFileSystem) MountByID(mount string) {
 }
 
 func (fs *customFileSystem) MountProject(mount, uuid string) {
-       fs.root.inode.Child(mount, func(inode) (inode, error) {
+       fs.root.treenode.Child(mount, func(inode) (inode, error) {
                return fs.newProjectNode(fs.root, mount, uuid), nil
        })
 }
 
 func (fs *customFileSystem) MountUsers(mount string) {
-       fs.root.inode.Child(mount, func(inode) (inode, error) {
+       fs.root.treenode.Child(mount, func(inode) (inode, error) {
                return &lookupnode{
                        stale:   fs.Stale,
                        loadOne: fs.usersLoadOne,
                        loadAll: fs.usersLoadAll,
-                       inode: &treenode{
+                       treenode: treenode{
                                fs:     fs,
                                parent: fs.root,
                                inodes: make(map[string]inode),
@@ -115,10 +115,7 @@ func (c *Client) SiteFileSystem(kc keepClient) CustomFileSystem {
 }
 
 func (fs *customFileSystem) Sync() error {
-       fs.staleLock.Lock()
-       defer fs.staleLock.Unlock()
-       fs.staleThreshold = time.Now()
-       return nil
+       return fs.root.Sync()
 }
 
 // Stale returns true if information obtained at time t should be
@@ -130,7 +127,7 @@ func (fs *customFileSystem) Stale(t time.Time) bool {
 }
 
 func (fs *customFileSystem) newNode(name string, perm os.FileMode, modTime time.Time) (node inode, err error) {
-       return nil, ErrInvalidOperation
+       return nil, ErrInvalidArgument
 }
 
 func (fs *customFileSystem) mountByID(parent inode, id string) inode {
@@ -149,13 +146,13 @@ func (fs *customFileSystem) mountCollection(parent inode, id string) inode {
        if err != nil {
                return nil
        }
-       cfs, err := coll.FileSystem(fs, fs)
+       newfs, err := coll.FileSystem(fs, fs)
        if err != nil {
                return nil
        }
-       root := cfs.rootnode()
-       root.SetParent(parent, id)
-       return root
+       cfs := newfs.(*collectionFileSystem)
+       cfs.SetParent(parent, id)
+       return cfs
 }
 
 func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode {
@@ -163,7 +160,7 @@ func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode
                stale:   fs.Stale,
                loadOne: func(parent inode, name string) (inode, error) { return fs.projectsLoadOne(parent, uuid, name) },
                loadAll: func(parent inode) ([]inode, error) { return fs.projectsLoadAll(parent, uuid) },
-               inode: &treenode{
+               treenode: treenode{
                        fs:     fs,
                        parent: root,
                        inodes: make(map[string]inode),
@@ -176,24 +173,24 @@ func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode
        }
 }
 
-// vdirnode wraps an inode by ignoring any requests to add/replace
-// children, and calling a create() func when a non-existing child is
-// looked up.
+// vdirnode wraps an inode by rejecting (with ErrInvalidArgument)
+// calls that add/replace children directly, instead calling a
+// create() func when a non-existing child is looked up.
 //
 // create() can return either a new node, which will be added to the
 // treenode, or nil for ENOENT.
 type vdirnode struct {
-       inode
+       treenode
        create func(parent inode, name string) inode
 }
 
 func (vn *vdirnode) Child(name string, replace func(inode) (inode, error)) (inode, error) {
-       return vn.inode.Child(name, func(existing inode) (inode, error) {
+       return vn.treenode.Child(name, func(existing inode) (inode, error) {
                if existing == nil && vn.create != nil {
                        existing = vn.create(vn, name)
                        if existing != nil {
                                existing.SetParent(vn, name)
-                               vn.inode.(*treenode).fileinfo.modTime = time.Now()
+                               vn.treenode.fileinfo.modTime = time.Now()
                        }
                }
                if replace == nil {
index 80cc03df37b88ad82ad246db6d4d7bce68dd68a2..778b12015a6f3964be7db301f30cd8ca5db1a971 100644 (file)
@@ -7,6 +7,7 @@ package arvados
 import (
        "net/http"
        "os"
+       "time"
 
        check "gopkg.in/check.v1"
 )
@@ -22,6 +23,8 @@ const (
        fixtureFooCollectionPDH        = "1f4b0bc7583c2a7f9102c395f4ffc5e3+45"
        fixtureFooCollection           = "zzzzz-4zz18-fy296fx3hot09f7"
        fixtureNonexistentCollection   = "zzzzz-4zz18-totallynotexist"
+       fixtureBlobSigningKey          = "zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc"
+       fixtureBlobSigningTTL          = 336 * time.Hour
 )
 
 var _ = check.Suite(&SiteFSSuite{})
@@ -41,7 +44,11 @@ func (s *SiteFSSuite) SetUpTest(c *check.C) {
        s.kc = &keepClientStub{
                blocks: map[string][]byte{
                        "3858f62230ac3c915f300c664312c63f": []byte("foobar"),
-               }}
+               },
+               sigkey:    fixtureBlobSigningKey,
+               sigttl:    fixtureBlobSigningTTL,
+               authToken: fixtureActiveToken,
+       }
        s.fs = s.client.SiteFileSystem(s.kc)
 }
 
@@ -98,7 +105,7 @@ func (s *SiteFSSuite) TestByUUIDAndPDH(c *check.C) {
        c.Check(names, check.DeepEquals, []string{"baz"})
 
        _, err = s.fs.OpenFile("/by_id/"+fixtureNonexistentCollection, os.O_RDWR|os.O_CREATE, 0755)
-       c.Check(err, check.Equals, ErrInvalidOperation)
+       c.Check(err, check.Equals, ErrInvalidArgument)
        err = s.fs.Rename("/by_id/"+fixtureFooCollection, "/by_id/beep")
        c.Check(err, check.Equals, ErrInvalidArgument)
        err = s.fs.Rename("/by_id/"+fixtureFooCollection+"/foo", "/by_id/beep")
index 793e981b505abb63cef5730b5f299afaf42564a6..0962779658358c95006435c8566f1c8d3c1e97fd 100644 (file)
@@ -7,7 +7,6 @@
 
 # Sensitive files and local configuration
 /config/database.yml
-/config/initializers/omniauth.rb
 /config/application.yml
 
 # asset cache
@@ -35,4 +34,4 @@
 /package-build.version
 
 # Debugger history
-.byebug_history
\ No newline at end of file
+.byebug_history
index 643ca4f587f51bc9b353ab29b4a82869d96578a8..915924e28863c8e5de97af724c60249583c23406 100644 (file)
@@ -227,6 +227,10 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
        }
 
+       if h.serveS3(w, r) {
+               return
+       }
+
        pathParts := strings.Split(r.URL.Path[1:], "/")
 
        var stripParts int
@@ -509,6 +513,27 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        }
 }
 
+func (h *handler) getClients(reqID, token string) (arv *arvadosclient.ArvadosClient, kc *keepclient.KeepClient, client *arvados.Client, release func(), err error) {
+       arv = h.clientPool.Get()
+       if arv == nil {
+               return nil, nil, nil, nil, err
+       }
+       release = func() { h.clientPool.Put(arv) }
+       arv.ApiToken = token
+       kc, err = keepclient.MakeKeepClient(arv)
+       if err != nil {
+               release()
+               return
+       }
+       kc.RequestID = reqID
+       client = (&arvados.Client{
+               APIHost:   arv.ApiServer,
+               AuthToken: arv.ApiToken,
+               Insecure:  arv.ApiInsecure,
+       }).WithRequestID(reqID)
+       return
+}
+
 func (h *handler) serveSiteFS(w http.ResponseWriter, r *http.Request, tokens []string, credentialsOK, attachment bool) {
        if len(tokens) == 0 {
                w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
@@ -519,25 +544,13 @@ func (h *handler) serveSiteFS(w http.ResponseWriter, r *http.Request, tokens []s
                http.Error(w, errReadOnly.Error(), http.StatusMethodNotAllowed)
                return
        }
-       arv := h.clientPool.Get()
-       if arv == nil {
+       _, kc, client, release, err := h.getClients(r.Header.Get("X-Request-Id"), tokens[0])
+       if err != nil {
                http.Error(w, "Pool failed: "+h.clientPool.Err().Error(), http.StatusInternalServerError)
                return
        }
-       defer h.clientPool.Put(arv)
-       arv.ApiToken = tokens[0]
+       defer release()
 
-       kc, err := keepclient.MakeKeepClient(arv)
-       if err != nil {
-               http.Error(w, "error setting up keep client: "+err.Error(), http.StatusInternalServerError)
-               return
-       }
-       kc.RequestID = r.Header.Get("X-Request-Id")
-       client := (&arvados.Client{
-               APIHost:   arv.ApiServer,
-               AuthToken: arv.ApiToken,
-               Insecure:  arv.ApiInsecure,
-       }).WithRequestID(r.Header.Get("X-Request-Id"))
        fs := client.SiteFileSystem(kc)
        fs.ForwardSlashNameSubstitution(h.Config.cluster.Collections.ForwardSlashNameSubstitution)
        f, err := fs.Open(r.URL.Path)
index e4028842f0c6b9390715a93c836846f2d9ba753b..647eab1653294311644bdce91faa367bd0ec1832 100644 (file)
@@ -14,6 +14,7 @@ import (
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "github.com/coreos/go-systemd/daemon"
        "github.com/ghodss/yaml"
+       "github.com/sirupsen/logrus"
        log "github.com/sirupsen/logrus"
 )
 
@@ -111,7 +112,7 @@ func main() {
 
        os.Setenv("ARVADOS_API_HOST", cfg.cluster.Services.Controller.ExternalURL.Host)
        srv := &server{Config: cfg}
-       if err := srv.Start(); err != nil {
+       if err := srv.Start(logrus.StandardLogger()); err != nil {
                log.Fatal(err)
        }
        if _, err := daemon.SdNotify(false, "READY=1"); err != nil {
diff --git a/services/keep-web/s3.go b/services/keep-web/s3.go
new file mode 100644 (file)
index 0000000..12e294d
--- /dev/null
@@ -0,0 +1,404 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+       "encoding/xml"
+       "errors"
+       "fmt"
+       "io"
+       "net/http"
+       "os"
+       "path/filepath"
+       "sort"
+       "strconv"
+       "strings"
+
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "github.com/AdRoll/goamz/s3"
+)
+
+const s3MaxKeys = 1000
+
+// serveS3 handles r and returns true if r is a request from an S3
+// client, otherwise it returns false.
+func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
+       var token string
+       if auth := r.Header.Get("Authorization"); strings.HasPrefix(auth, "AWS ") {
+               split := strings.SplitN(auth[4:], ":", 2)
+               if len(split) < 2 {
+                       w.WriteHeader(http.StatusUnauthorized)
+                       return true
+               }
+               token = split[0]
+       } else if strings.HasPrefix(auth, "AWS4-HMAC-SHA256 ") {
+               for _, cmpt := range strings.Split(auth[17:], ",") {
+                       cmpt = strings.TrimSpace(cmpt)
+                       split := strings.SplitN(cmpt, "=", 2)
+                       if len(split) == 2 && split[0] == "Credential" {
+                               keyandscope := strings.Split(split[1], "/")
+                               if len(keyandscope[0]) > 0 {
+                                       token = keyandscope[0]
+                                       break
+                               }
+                       }
+               }
+               if token == "" {
+                       w.WriteHeader(http.StatusBadRequest)
+                       fmt.Println(w, "invalid V4 signature")
+                       return true
+               }
+       } else {
+               return false
+       }
+
+       _, kc, client, release, err := h.getClients(r.Header.Get("X-Request-Id"), token)
+       if err != nil {
+               http.Error(w, "Pool failed: "+h.clientPool.Err().Error(), http.StatusInternalServerError)
+               return true
+       }
+       defer release()
+
+       fs := client.SiteFileSystem(kc)
+       fs.ForwardSlashNameSubstitution(h.Config.cluster.Collections.ForwardSlashNameSubstitution)
+
+       objectNameGiven := strings.Count(strings.TrimSuffix(r.URL.Path, "/"), "/") > 1
+
+       switch {
+       case r.Method == http.MethodGet && !objectNameGiven:
+               // Path is "/{uuid}" or "/{uuid}/", has no object name
+               if _, ok := r.URL.Query()["versioning"]; ok {
+                       // GetBucketVersioning
+                       w.Header().Set("Content-Type", "application/xml")
+                       io.WriteString(w, xml.Header)
+                       fmt.Fprintln(w, `<VersioningConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/"/>`)
+               } else {
+                       // ListObjects
+                       h.s3list(w, r, fs)
+               }
+               return true
+       case r.Method == http.MethodGet || r.Method == http.MethodHead:
+               fspath := "/by_id" + r.URL.Path
+               fi, err := fs.Stat(fspath)
+               if r.Method == "HEAD" && !objectNameGiven {
+                       // HeadBucket
+                       if err == nil && fi.IsDir() {
+                               w.WriteHeader(http.StatusOK)
+                       } else if os.IsNotExist(err) {
+                               w.WriteHeader(http.StatusNotFound)
+                       } else {
+                               http.Error(w, err.Error(), http.StatusBadGateway)
+                       }
+                       return true
+               }
+               if err == nil && fi.IsDir() && objectNameGiven && strings.HasSuffix(fspath, "/") && h.Config.cluster.Collections.S3FolderObjects {
+                       w.Header().Set("Content-Type", "application/x-directory")
+                       w.WriteHeader(http.StatusOK)
+                       return true
+               }
+               if os.IsNotExist(err) ||
+                       (err != nil && err.Error() == "not a directory") ||
+                       (fi != nil && fi.IsDir()) {
+                       http.Error(w, "not found", http.StatusNotFound)
+                       return true
+               }
+               // shallow copy r, and change URL path
+               r := *r
+               r.URL.Path = fspath
+               http.FileServer(fs).ServeHTTP(w, &r)
+               return true
+       case r.Method == http.MethodPut:
+               if !objectNameGiven {
+                       http.Error(w, "missing object name in PUT request", http.StatusBadRequest)
+                       return true
+               }
+               fspath := "by_id" + r.URL.Path
+               var objectIsDir bool
+               if strings.HasSuffix(fspath, "/") {
+                       if !h.Config.cluster.Collections.S3FolderObjects {
+                               http.Error(w, "invalid object name: trailing slash", http.StatusBadRequest)
+                               return true
+                       }
+                       n, err := r.Body.Read(make([]byte, 1))
+                       if err != nil && err != io.EOF {
+                               http.Error(w, fmt.Sprintf("error reading request body: %s", err), http.StatusInternalServerError)
+                               return true
+                       } else if n > 0 {
+                               http.Error(w, "cannot create object with trailing '/' char unless content is empty", http.StatusBadRequest)
+                               return true
+                       } else if strings.SplitN(r.Header.Get("Content-Type"), ";", 2)[0] != "application/x-directory" {
+                               http.Error(w, "cannot create object with trailing '/' char unless Content-Type is 'application/x-directory'", http.StatusBadRequest)
+                               return true
+                       }
+                       // Given PUT "foo/bar/", we'll use "foo/bar/."
+                       // in the "ensure parents exist" block below,
+                       // and then we'll be done.
+                       fspath += "."
+                       objectIsDir = true
+               }
+               fi, err := fs.Stat(fspath)
+               if err != nil && err.Error() == "not a directory" {
+                       // requested foo/bar, but foo is a file
+                       http.Error(w, "object name conflicts with existing object", http.StatusBadRequest)
+                       return true
+               }
+               if strings.HasSuffix(r.URL.Path, "/") && err == nil && !fi.IsDir() {
+                       // requested foo/bar/, but foo/bar is a file
+                       http.Error(w, "object name conflicts with existing object", http.StatusBadRequest)
+                       return true
+               }
+               // create missing parent/intermediate directories, if any
+               for i, c := range fspath {
+                       if i > 0 && c == '/' {
+                               dir := fspath[:i]
+                               if strings.HasSuffix(dir, "/") {
+                                       err = errors.New("invalid object name (consecutive '/' chars)")
+                                       http.Error(w, err.Error(), http.StatusBadRequest)
+                                       return true
+                               }
+                               err = fs.Mkdir(dir, 0755)
+                               if err == arvados.ErrInvalidArgument {
+                                       // Cannot create a directory
+                                       // here.
+                                       err = fmt.Errorf("mkdir %q failed: %w", dir, err)
+                                       http.Error(w, err.Error(), http.StatusBadRequest)
+                                       return true
+                               } else if err != nil && !os.IsExist(err) {
+                                       err = fmt.Errorf("mkdir %q failed: %w", dir, err)
+                                       http.Error(w, err.Error(), http.StatusInternalServerError)
+                                       return true
+                               }
+                       }
+               }
+               if !objectIsDir {
+                       f, err := fs.OpenFile(fspath, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644)
+                       if os.IsNotExist(err) {
+                               f, err = fs.OpenFile(fspath, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644)
+                       }
+                       if err != nil {
+                               err = fmt.Errorf("open %q failed: %w", r.URL.Path, err)
+                               http.Error(w, err.Error(), http.StatusBadRequest)
+                               return true
+                       }
+                       defer f.Close()
+                       _, err = io.Copy(f, r.Body)
+                       if err != nil {
+                               err = fmt.Errorf("write to %q failed: %w", r.URL.Path, err)
+                               http.Error(w, err.Error(), http.StatusBadGateway)
+                               return true
+                       }
+                       err = f.Close()
+                       if err != nil {
+                               err = fmt.Errorf("write to %q failed: close: %w", r.URL.Path, err)
+                               http.Error(w, err.Error(), http.StatusBadGateway)
+                               return true
+                       }
+               }
+               err = fs.Sync()
+               if err != nil {
+                       err = fmt.Errorf("sync failed: %w", err)
+                       http.Error(w, err.Error(), http.StatusInternalServerError)
+                       return true
+               }
+               w.WriteHeader(http.StatusOK)
+               return true
+       default:
+               http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+               return true
+       }
+}
+
+// Call fn on the given path (directory) and its contents, in
+// lexicographic order.
+//
+// If isRoot==true and path is not a directory, return nil.
+//
+// If fn returns filepath.SkipDir when called on a directory, don't
+// descend into that directory.
+func walkFS(fs arvados.CustomFileSystem, path string, isRoot bool, fn func(path string, fi os.FileInfo) error) error {
+       if isRoot {
+               fi, err := fs.Stat(path)
+               if os.IsNotExist(err) || (err == nil && !fi.IsDir()) {
+                       return nil
+               } else if err != nil {
+                       return err
+               }
+               err = fn(path, fi)
+               if err == filepath.SkipDir {
+                       return nil
+               } else if err != nil {
+                       return err
+               }
+       }
+       f, err := fs.Open(path)
+       if os.IsNotExist(err) && isRoot {
+               return nil
+       } else if err != nil {
+               return fmt.Errorf("open %q: %w", path, err)
+       }
+       defer f.Close()
+       if path == "/" {
+               path = ""
+       }
+       fis, err := f.Readdir(-1)
+       if err != nil {
+               return err
+       }
+       sort.Slice(fis, func(i, j int) bool { return fis[i].Name() < fis[j].Name() })
+       for _, fi := range fis {
+               err = fn(path+"/"+fi.Name(), fi)
+               if err == filepath.SkipDir {
+                       continue
+               } else if err != nil {
+                       return err
+               }
+               if fi.IsDir() {
+                       err = walkFS(fs, path+"/"+fi.Name(), false, fn)
+                       if err != nil {
+                               return err
+                       }
+               }
+       }
+       return nil
+}
+
+var errDone = errors.New("done")
+
+func (h *handler) s3list(w http.ResponseWriter, r *http.Request, fs arvados.CustomFileSystem) {
+       var params struct {
+               bucket    string
+               delimiter string
+               marker    string
+               maxKeys   int
+               prefix    string
+       }
+       params.bucket = strings.SplitN(r.URL.Path[1:], "/", 2)[0]
+       params.delimiter = r.FormValue("delimiter")
+       params.marker = r.FormValue("marker")
+       if mk, _ := strconv.ParseInt(r.FormValue("max-keys"), 10, 64); mk > 0 && mk < s3MaxKeys {
+               params.maxKeys = int(mk)
+       } else {
+               params.maxKeys = s3MaxKeys
+       }
+       params.prefix = r.FormValue("prefix")
+
+       bucketdir := "by_id/" + params.bucket
+       // walkpath is the directory (relative to bucketdir) we need
+       // to walk: the innermost directory that is guaranteed to
+       // contain all paths that have the requested prefix. Examples:
+       // prefix "foo/bar"  => walkpath "foo"
+       // prefix "foo/bar/" => walkpath "foo/bar"
+       // prefix "foo"      => walkpath ""
+       // prefix ""         => walkpath ""
+       walkpath := params.prefix
+       if cut := strings.LastIndex(walkpath, "/"); cut >= 0 {
+               walkpath = walkpath[:cut]
+       } else {
+               walkpath = ""
+       }
+
+       resp := s3.ListResp{
+               Name:      strings.SplitN(r.URL.Path[1:], "/", 2)[0],
+               Prefix:    params.prefix,
+               Delimiter: params.delimiter,
+               Marker:    params.marker,
+               MaxKeys:   params.maxKeys,
+       }
+       commonPrefixes := map[string]bool{}
+       err := walkFS(fs, strings.TrimSuffix(bucketdir+"/"+walkpath, "/"), true, func(path string, fi os.FileInfo) error {
+               if path == bucketdir {
+                       return nil
+               }
+               path = path[len(bucketdir)+1:]
+               filesize := fi.Size()
+               if fi.IsDir() {
+                       path += "/"
+                       filesize = 0
+               }
+               if len(path) <= len(params.prefix) {
+                       if path > params.prefix[:len(path)] {
+                               // with prefix "foobar", walking "fooz" means we're done
+                               return errDone
+                       }
+                       if path < params.prefix[:len(path)] {
+                               // with prefix "foobar", walking "foobag" is pointless
+                               return filepath.SkipDir
+                       }
+                       if fi.IsDir() && !strings.HasPrefix(params.prefix+"/", path) {
+                               // with prefix "foo/bar", walking "fo"
+                               // is pointless (but walking "foo" or
+                               // "foo/bar" is necessary)
+                               return filepath.SkipDir
+                       }
+                       if len(path) < len(params.prefix) {
+                               // can't skip anything, and this entry
+                               // isn't in the results, so just
+                               // continue descent
+                               return nil
+                       }
+               } else {
+                       if path[:len(params.prefix)] > params.prefix {
+                               // with prefix "foobar", nothing we
+                               // see after "foozzz" is relevant
+                               return errDone
+                       }
+               }
+               if path < params.marker || path < params.prefix {
+                       return nil
+               }
+               if fi.IsDir() && !h.Config.cluster.Collections.S3FolderObjects {
+                       // Note we don't add anything to
+                       // commonPrefixes here even if delimiter is
+                       // "/". We descend into the directory, and
+                       // return a commonPrefix only if we end up
+                       // finding a regular file inside it.
+                       return nil
+               }
+               if params.delimiter != "" {
+                       idx := strings.Index(path[len(params.prefix):], params.delimiter)
+                       if idx >= 0 {
+                               // with prefix "foobar" and delimiter
+                               // "z", when we hit "foobar/baz", we
+                               // add "/baz" to commonPrefixes and
+                               // stop descending.
+                               commonPrefixes[path[:len(params.prefix)+idx+1]] = true
+                               return filepath.SkipDir
+                       }
+               }
+               if len(resp.Contents)+len(commonPrefixes) >= params.maxKeys {
+                       resp.IsTruncated = true
+                       if params.delimiter != "" {
+                               resp.NextMarker = path
+                       }
+                       return errDone
+               }
+               resp.Contents = append(resp.Contents, s3.Key{
+                       Key:          path,
+                       LastModified: fi.ModTime().UTC().Format("2006-01-02T15:04:05.999") + "Z",
+                       Size:         filesize,
+               })
+               return nil
+       })
+       if err != nil && err != errDone {
+               http.Error(w, err.Error(), http.StatusInternalServerError)
+               return
+       }
+       if params.delimiter != "" {
+               for prefix := range commonPrefixes {
+                       resp.CommonPrefixes = append(resp.CommonPrefixes, prefix)
+                       sort.Strings(resp.CommonPrefixes)
+               }
+       }
+       wrappedResp := struct {
+               XMLName string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListBucketResult"`
+               s3.ListResp
+       }{"", resp}
+       w.Header().Set("Content-Type", "application/xml")
+       io.WriteString(w, xml.Header)
+       if err := xml.NewEncoder(w).Encode(wrappedResp); err != nil {
+               ctxlog.FromContext(r.Context()).WithError(err).Error("error writing xml response")
+       }
+}
diff --git a/services/keep-web/s3_test.go b/services/keep-web/s3_test.go
new file mode 100644 (file)
index 0000000..73553ff
--- /dev/null
@@ -0,0 +1,546 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+       "bytes"
+       "crypto/rand"
+       "fmt"
+       "io/ioutil"
+       "net/http"
+       "os"
+       "strings"
+       "sync"
+       "time"
+
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/arvadosclient"
+       "git.arvados.org/arvados.git/sdk/go/arvadostest"
+       "git.arvados.org/arvados.git/sdk/go/keepclient"
+       "github.com/AdRoll/goamz/aws"
+       "github.com/AdRoll/goamz/s3"
+       check "gopkg.in/check.v1"
+)
+
+type s3stage struct {
+       arv        *arvados.Client
+       ac         *arvadosclient.ArvadosClient
+       kc         *keepclient.KeepClient
+       proj       arvados.Group
+       projbucket *s3.Bucket
+       coll       arvados.Collection
+       collbucket *s3.Bucket
+}
+
+func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
+       var proj arvados.Group
+       var coll arvados.Collection
+       arv := arvados.NewClientFromEnv()
+       arv.AuthToken = arvadostest.ActiveToken
+       err := arv.RequestAndDecode(&proj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
+               "group": map[string]interface{}{
+                       "group_class": "project",
+                       "name":        "keep-web s3 test",
+               },
+               "ensure_unique_name": true,
+       })
+       c.Assert(err, check.IsNil)
+       err = arv.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{
+               "owner_uuid":    proj.UUID,
+               "name":          "keep-web s3 test collection",
+               "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n",
+       }})
+       c.Assert(err, check.IsNil)
+       ac, err := arvadosclient.New(arv)
+       c.Assert(err, check.IsNil)
+       kc, err := keepclient.MakeKeepClient(ac)
+       c.Assert(err, check.IsNil)
+       fs, err := coll.FileSystem(arv, kc)
+       c.Assert(err, check.IsNil)
+       f, err := fs.OpenFile("sailboat.txt", os.O_CREATE|os.O_WRONLY, 0644)
+       c.Assert(err, check.IsNil)
+       _, err = f.Write([]byte("⛵\n"))
+       c.Assert(err, check.IsNil)
+       err = f.Close()
+       c.Assert(err, check.IsNil)
+       err = fs.Sync()
+       c.Assert(err, check.IsNil)
+       err = arv.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+coll.UUID, nil, nil)
+       c.Assert(err, check.IsNil)
+
+       auth := aws.NewAuth(arvadostest.ActiveTokenV2, arvadostest.ActiveTokenV2, "", time.Now().Add(time.Hour))
+       region := aws.Region{
+               Name:       s.testServer.Addr,
+               S3Endpoint: "http://" + s.testServer.Addr,
+       }
+       client := s3.New(*auth, region)
+       return s3stage{
+               arv:  arv,
+               ac:   ac,
+               kc:   kc,
+               proj: proj,
+               projbucket: &s3.Bucket{
+                       S3:   client,
+                       Name: proj.UUID,
+               },
+               coll: coll,
+               collbucket: &s3.Bucket{
+                       S3:   client,
+                       Name: coll.UUID,
+               },
+       }
+}
+
+func (stage s3stage) teardown(c *check.C) {
+       if stage.coll.UUID != "" {
+               err := stage.arv.RequestAndDecode(&stage.coll, "DELETE", "arvados/v1/collections/"+stage.coll.UUID, nil, nil)
+               c.Check(err, check.IsNil)
+       }
+       if stage.proj.UUID != "" {
+               err := stage.arv.RequestAndDecode(&stage.proj, "DELETE", "arvados/v1/groups/"+stage.proj.UUID, nil, nil)
+               c.Check(err, check.IsNil)
+       }
+}
+
+func (s *IntegrationSuite) TestS3HeadBucket(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+
+       for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
+               c.Logf("bucket %s", bucket.Name)
+               exists, err := bucket.Exists("")
+               c.Check(err, check.IsNil)
+               c.Check(exists, check.Equals, true)
+       }
+}
+
+func (s *IntegrationSuite) TestS3CollectionGetObject(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+       s.testS3GetObject(c, stage.collbucket, "")
+}
+func (s *IntegrationSuite) TestS3ProjectGetObject(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+       s.testS3GetObject(c, stage.projbucket, stage.coll.Name+"/")
+}
+func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix string) {
+       rdr, err := bucket.GetReader(prefix + "emptyfile")
+       c.Assert(err, check.IsNil)
+       buf, err := ioutil.ReadAll(rdr)
+       c.Check(err, check.IsNil)
+       c.Check(len(buf), check.Equals, 0)
+       err = rdr.Close()
+       c.Check(err, check.IsNil)
+
+       // GetObject
+       rdr, err = bucket.GetReader(prefix + "missingfile")
+       c.Check(err, check.ErrorMatches, `404 Not Found`)
+
+       // HeadObject
+       exists, err := bucket.Exists(prefix + "missingfile")
+       c.Check(err, check.IsNil)
+       c.Check(exists, check.Equals, false)
+
+       // GetObject
+       rdr, err = bucket.GetReader(prefix + "sailboat.txt")
+       c.Assert(err, check.IsNil)
+       buf, err = ioutil.ReadAll(rdr)
+       c.Check(err, check.IsNil)
+       c.Check(buf, check.DeepEquals, []byte("⛵\n"))
+       err = rdr.Close()
+       c.Check(err, check.IsNil)
+
+       // HeadObject
+       exists, err = bucket.Exists(prefix + "sailboat.txt")
+       c.Check(err, check.IsNil)
+       c.Check(exists, check.Equals, true)
+}
+
+func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+       s.testS3PutObjectSuccess(c, stage.collbucket, "")
+}
+func (s *IntegrationSuite) TestS3ProjectPutObjectSuccess(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+       s.testS3PutObjectSuccess(c, stage.projbucket, stage.coll.Name+"/")
+}
+func (s *IntegrationSuite) testS3PutObjectSuccess(c *check.C, bucket *s3.Bucket, prefix string) {
+       for _, trial := range []struct {
+               path        string
+               size        int
+               contentType string
+       }{
+               {
+                       path:        "newfile",
+                       size:        128000000,
+                       contentType: "application/octet-stream",
+               }, {
+                       path:        "newdir/newfile",
+                       size:        1 << 26,
+                       contentType: "application/octet-stream",
+               }, {
+                       path:        "newdir1/newdir2/newfile",
+                       size:        0,
+                       contentType: "application/octet-stream",
+               }, {
+                       path:        "newdir1/newdir2/newdir3/",
+                       size:        0,
+                       contentType: "application/x-directory",
+               },
+       } {
+               c.Logf("=== %v", trial)
+
+               objname := prefix + trial.path
+
+               _, err := bucket.GetReader(objname)
+               c.Assert(err, check.ErrorMatches, `404 Not Found`)
+
+               buf := make([]byte, trial.size)
+               rand.Read(buf)
+
+               err = bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), trial.contentType, s3.Private, s3.Options{})
+               c.Check(err, check.IsNil)
+
+               rdr, err := bucket.GetReader(objname)
+               if strings.HasSuffix(trial.path, "/") && !s.testServer.Config.cluster.Collections.S3FolderObjects {
+                       c.Check(err, check.NotNil)
+                       continue
+               } else if !c.Check(err, check.IsNil) {
+                       continue
+               }
+               buf2, err := ioutil.ReadAll(rdr)
+               c.Check(err, check.IsNil)
+               c.Check(buf2, check.HasLen, len(buf))
+               c.Check(bytes.Equal(buf, buf2), check.Equals, true)
+       }
+}
+
+func (s *IntegrationSuite) TestS3ProjectPutObjectNotSupported(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+       bucket := stage.projbucket
+
+       for _, trial := range []struct {
+               path        string
+               size        int
+               contentType string
+       }{
+               {
+                       path:        "newfile",
+                       size:        1234,
+                       contentType: "application/octet-stream",
+               }, {
+                       path:        "newdir/newfile",
+                       size:        1234,
+                       contentType: "application/octet-stream",
+               }, {
+                       path:        "newdir2/",
+                       size:        0,
+                       contentType: "application/x-directory",
+               },
+       } {
+               c.Logf("=== %v", trial)
+
+               _, err := bucket.GetReader(trial.path)
+               c.Assert(err, check.ErrorMatches, `404 Not Found`)
+
+               buf := make([]byte, trial.size)
+               rand.Read(buf)
+
+               err = bucket.PutReader(trial.path, bytes.NewReader(buf), int64(len(buf)), trial.contentType, s3.Private, s3.Options{})
+               c.Check(err, check.ErrorMatches, `400 Bad Request`)
+
+               _, err = bucket.GetReader(trial.path)
+               c.Assert(err, check.ErrorMatches, `404 Not Found`)
+       }
+}
+
+func (s *IntegrationSuite) TestS3CollectionPutObjectFailure(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+       s.testS3PutObjectFailure(c, stage.collbucket, "")
+}
+func (s *IntegrationSuite) TestS3ProjectPutObjectFailure(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+       s.testS3PutObjectFailure(c, stage.projbucket, stage.coll.Name+"/")
+}
+func (s *IntegrationSuite) testS3PutObjectFailure(c *check.C, bucket *s3.Bucket, prefix string) {
+       s.testServer.Config.cluster.Collections.S3FolderObjects = false
+       var wg sync.WaitGroup
+       for _, trial := range []struct {
+               path string
+       }{
+               {
+                       path: "emptyfile/newname", // emptyfile exists, see s3setup()
+               }, {
+                       path: "emptyfile/", // emptyfile exists, see s3setup()
+               }, {
+                       path: "emptydir", // dir already exists, see s3setup()
+               }, {
+                       path: "emptydir/",
+               }, {
+                       path: "emptydir//",
+               }, {
+                       path: "newdir/",
+               }, {
+                       path: "newdir//",
+               }, {
+                       path: "/",
+               }, {
+                       path: "//",
+               }, {
+                       path: "foo//bar",
+               }, {
+                       path: "",
+               },
+       } {
+               trial := trial
+               wg.Add(1)
+               go func() {
+                       defer wg.Done()
+                       c.Logf("=== %v", trial)
+
+                       objname := prefix + trial.path
+
+                       buf := make([]byte, 1234)
+                       rand.Read(buf)
+
+                       err := bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), "application/octet-stream", s3.Private, s3.Options{})
+                       if !c.Check(err, check.ErrorMatches, `400 Bad.*`, check.Commentf("PUT %q should fail", objname)) {
+                               return
+                       }
+
+                       if objname != "" && objname != "/" {
+                               _, err = bucket.GetReader(objname)
+                               c.Check(err, check.ErrorMatches, `404 Not Found`, check.Commentf("GET %q should return 404", objname))
+                       }
+               }()
+       }
+       wg.Wait()
+}
+
+func (stage *s3stage) writeBigDirs(c *check.C, dirs int, filesPerDir int) {
+       fs, err := stage.coll.FileSystem(stage.arv, stage.kc)
+       c.Assert(err, check.IsNil)
+       for d := 0; d < dirs; d++ {
+               dir := fmt.Sprintf("dir%d", d)
+               c.Assert(fs.Mkdir(dir, 0755), check.IsNil)
+               for i := 0; i < filesPerDir; i++ {
+                       f, err := fs.OpenFile(fmt.Sprintf("%s/file%d.txt", dir, i), os.O_CREATE|os.O_WRONLY, 0644)
+                       c.Assert(err, check.IsNil)
+                       c.Assert(f.Close(), check.IsNil)
+               }
+       }
+       c.Assert(fs.Sync(), check.IsNil)
+}
+
+func (s *IntegrationSuite) TestS3GetBucketVersioning(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+       for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
+               req, err := http.NewRequest("GET", bucket.URL("/"), nil)
+               req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
+               req.URL.RawQuery = "versioning"
+               resp, err := http.DefaultClient.Do(req)
+               c.Assert(err, check.IsNil)
+               c.Check(resp.Header.Get("Content-Type"), check.Equals, "application/xml")
+               buf, err := ioutil.ReadAll(resp.Body)
+               c.Assert(err, check.IsNil)
+               c.Check(string(buf), check.Equals, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<VersioningConfiguration xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\"/>\n")
+       }
+}
+
+func (s *IntegrationSuite) TestS3CollectionList(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+
+       var markers int
+       for markers, s.testServer.Config.cluster.Collections.S3FolderObjects = range []bool{false, true} {
+               dirs := 2
+               filesPerDir := 1001
+               stage.writeBigDirs(c, dirs, filesPerDir)
+               // Total # objects is:
+               //                 2 file entries from s3setup (emptyfile and sailboat.txt)
+               //                +1 fake "directory" marker from s3setup (emptydir) (if enabled)
+               //             +dirs fake "directory" marker from writeBigDirs (dir0/, dir1/) (if enabled)
+               // +filesPerDir*dirs file entries from writeBigDirs (dir0/file0.txt, etc.)
+               s.testS3List(c, stage.collbucket, "", 4000, markers+2+(filesPerDir+markers)*dirs)
+               s.testS3List(c, stage.collbucket, "", 131, markers+2+(filesPerDir+markers)*dirs)
+               s.testS3List(c, stage.collbucket, "dir0/", 71, filesPerDir+markers)
+       }
+}
+func (s *IntegrationSuite) testS3List(c *check.C, bucket *s3.Bucket, prefix string, pageSize, expectFiles int) {
+       c.Logf("testS3List: prefix=%q pageSize=%d S3FolderObjects=%v", prefix, pageSize, s.testServer.Config.cluster.Collections.S3FolderObjects)
+       expectPageSize := pageSize
+       if expectPageSize > 1000 {
+               expectPageSize = 1000
+       }
+       gotKeys := map[string]s3.Key{}
+       nextMarker := ""
+       pages := 0
+       for {
+               resp, err := bucket.List(prefix, "", nextMarker, pageSize)
+               if !c.Check(err, check.IsNil) {
+                       break
+               }
+               c.Check(len(resp.Contents) <= expectPageSize, check.Equals, true)
+               if pages++; !c.Check(pages <= (expectFiles/expectPageSize)+1, check.Equals, true) {
+                       break
+               }
+               for _, key := range resp.Contents {
+                       gotKeys[key.Key] = key
+                       if strings.Contains(key.Key, "sailboat.txt") {
+                               c.Check(key.Size, check.Equals, int64(4))
+                       }
+               }
+               if !resp.IsTruncated {
+                       c.Check(resp.NextMarker, check.Equals, "")
+                       break
+               }
+               if !c.Check(resp.NextMarker, check.Not(check.Equals), "") {
+                       break
+               }
+               nextMarker = resp.NextMarker
+       }
+       c.Check(len(gotKeys), check.Equals, expectFiles)
+}
+
+func (s *IntegrationSuite) TestS3CollectionListRollup(c *check.C) {
+       for _, s.testServer.Config.cluster.Collections.S3FolderObjects = range []bool{false, true} {
+               s.testS3CollectionListRollup(c)
+       }
+}
+
+func (s *IntegrationSuite) testS3CollectionListRollup(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+
+       dirs := 2
+       filesPerDir := 500
+       stage.writeBigDirs(c, dirs, filesPerDir)
+       err := stage.collbucket.PutReader("dingbats", &bytes.Buffer{}, 0, "application/octet-stream", s3.Private, s3.Options{})
+       c.Assert(err, check.IsNil)
+       var allfiles []string
+       for marker := ""; ; {
+               resp, err := stage.collbucket.List("", "", marker, 20000)
+               c.Check(err, check.IsNil)
+               for _, key := range resp.Contents {
+                       if len(allfiles) == 0 || allfiles[len(allfiles)-1] != key.Key {
+                               allfiles = append(allfiles, key.Key)
+                       }
+               }
+               marker = resp.NextMarker
+               if marker == "" {
+                       break
+               }
+       }
+       markers := 0
+       if s.testServer.Config.cluster.Collections.S3FolderObjects {
+               markers = 1
+       }
+       c.Check(allfiles, check.HasLen, dirs*(filesPerDir+markers)+3+markers)
+
+       gotDirMarker := map[string]bool{}
+       for _, name := range allfiles {
+               isDirMarker := strings.HasSuffix(name, "/")
+               if markers == 0 {
+                       c.Check(isDirMarker, check.Equals, false, check.Commentf("name %q", name))
+               } else if isDirMarker {
+                       gotDirMarker[name] = true
+               } else if i := strings.LastIndex(name, "/"); i >= 0 {
+                       c.Check(gotDirMarker[name[:i+1]], check.Equals, true, check.Commentf("name %q", name))
+                       gotDirMarker[name[:i+1]] = true // skip redundant complaints about this dir marker
+               }
+       }
+
+       for _, trial := range []struct {
+               prefix    string
+               delimiter string
+               marker    string
+       }{
+               {"", "", ""},
+               {"di", "/", ""},
+               {"di", "r", ""},
+               {"di", "n", ""},
+               {"dir0", "/", ""},
+               {"dir0/", "/", ""},
+               {"dir0/f", "/", ""},
+               {"dir0", "", ""},
+               {"dir0/", "", ""},
+               {"dir0/f", "", ""},
+               {"dir0", "/", "dir0/file14.txt"},       // no commonprefixes
+               {"", "", "dir0/file14.txt"},            // middle page, skip walking dir1
+               {"", "", "dir1/file14.txt"},            // middle page, skip walking dir0
+               {"", "", "dir1/file498.txt"},           // last page of results
+               {"dir1/file", "", "dir1/file498.txt"},  // last page of results, with prefix
+               {"dir1/file", "/", "dir1/file498.txt"}, // last page of results, with prefix + delimiter
+               {"dir1", "Z", "dir1/file498.txt"},      // delimiter "Z" never appears
+               {"dir2", "/", ""},                      // prefix "dir2" does not exist
+               {"", "/", ""},
+       } {
+               c.Logf("\n\n=== trial %+v markers=%d", trial, markers)
+
+               maxKeys := 20
+               resp, err := stage.collbucket.List(trial.prefix, trial.delimiter, trial.marker, maxKeys)
+               c.Check(err, check.IsNil)
+               if resp.IsTruncated && trial.delimiter == "" {
+                       // goamz List method fills in the missing
+                       // NextMarker field if resp.IsTruncated, so
+                       // now we can't really tell whether it was
+                       // sent by the server or by goamz. In cases
+                       // where it should be empty but isn't, assume
+                       // it's goamz's fault.
+                       resp.NextMarker = ""
+               }
+
+               var expectKeys []string
+               var expectPrefixes []string
+               var expectNextMarker string
+               var expectTruncated bool
+               for _, key := range allfiles {
+                       full := len(expectKeys)+len(expectPrefixes) >= maxKeys
+                       if !strings.HasPrefix(key, trial.prefix) || key < trial.marker {
+                               continue
+                       } else if idx := strings.Index(key[len(trial.prefix):], trial.delimiter); trial.delimiter != "" && idx >= 0 {
+                               prefix := key[:len(trial.prefix)+idx+1]
+                               if len(expectPrefixes) > 0 && expectPrefixes[len(expectPrefixes)-1] == prefix {
+                                       // same prefix as previous key
+                               } else if full {
+                                       expectNextMarker = key
+                                       expectTruncated = true
+                               } else {
+                                       expectPrefixes = append(expectPrefixes, prefix)
+                               }
+                       } else if full {
+                               if trial.delimiter != "" {
+                                       expectNextMarker = key
+                               }
+                               expectTruncated = true
+                               break
+                       } else {
+                               expectKeys = append(expectKeys, key)
+                       }
+               }
+
+               var gotKeys []string
+               for _, key := range resp.Contents {
+                       gotKeys = append(gotKeys, key.Key)
+               }
+               var gotPrefixes []string
+               for _, prefix := range resp.CommonPrefixes {
+                       gotPrefixes = append(gotPrefixes, prefix)
+               }
+               commentf := check.Commentf("trial %+v markers=%d", trial, markers)
+               c.Check(gotKeys, check.DeepEquals, expectKeys, commentf)
+               c.Check(gotPrefixes, check.DeepEquals, expectPrefixes, commentf)
+               c.Check(resp.NextMarker, check.Equals, expectNextMarker, commentf)
+               c.Check(resp.IsTruncated, check.Equals, expectTruncated, commentf)
+               c.Logf("=== trial %+v keys %q prefixes %q nextMarker %q", trial, gotKeys, gotPrefixes, resp.NextMarker)
+       }
+}
index 46dc3d30179343e29edea208588103fe947c3c08..8f623c627d067f843f2746a2b8b64248006b1a18 100644 (file)
@@ -20,12 +20,12 @@ type server struct {
        Config *Config
 }
 
-func (srv *server) Start() error {
+func (srv *server) Start(logger *logrus.Logger) error {
        h := &handler{Config: srv.Config}
        reg := prometheus.NewRegistry()
        h.Config.Cache.registry = reg
-       ctx := ctxlog.Context(context.Background(), logrus.StandardLogger())
-       mh := httpserver.Instrument(reg, nil, httpserver.HandlerWithContext(ctx, httpserver.AddRequestIDs(httpserver.LogRequests(h))))
+       ctx := ctxlog.Context(context.Background(), logger)
+       mh := httpserver.Instrument(reg, logger, httpserver.HandlerWithContext(ctx, httpserver.AddRequestIDs(httpserver.LogRequests(h))))
        h.MetricsAPI = mh.ServeAPI(h.Config.cluster.ManagementToken, http.NotFoundHandler())
        srv.Handler = mh
        var listen arvados.URL
index bca7ff49fa0820c8affd818dd6c8b26ceca81755..c37852a128bbaa9571ebf1527f3f9f6b6cee41ae 100644 (file)
@@ -442,7 +442,7 @@ func (s *IntegrationSuite) SetUpTest(c *check.C) {
        cfg.cluster.ManagementToken = arvadostest.ManagementToken
        cfg.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
        s.testServer = &server{Config: cfg}
-       err = s.testServer.Start()
+       err = s.testServer.Start(ctxlog.TestLogger(c))
        c.Assert(err, check.Equals, nil)
 }
 
index 292a4fd746a6697fafa0bda5155e766bab79618d..5abaa90e36d1cfc60d286a336fb4551b6e1f5ee6 100755 (executable)
@@ -206,7 +206,8 @@ run() {
               --publish=9002:9002
               --publish=25101:25101
               --publish=8001:8001
-              --publish=8002:8002"
+              --publish=8002:8002
+             --publish=45000-45020:45000-45020"
     else
         PUBLIC=""
     fi
index f7fc1a07b401aba074f954044e2bde6ae88ef258..c8db9499cda716240ed8ba421d9f72d496a0bf27 100644 (file)
@@ -1,6 +1,5 @@
 {
   "variables": {
-    "storage_account": null,
     "resource_group": null,
     "client_id": "{{env `ARM_CLIENT_ID`}}",
     "client_secret": "{{env `ARM_CLIENT_SECRET`}}",
       "subscription_id": "{{user `subscription_id`}}",
       "tenant_id": "{{user `tenant_id`}}",
 
-      "resource_group_name": "{{user `resource_group`}}",
-      "storage_account": "{{user `storage_account`}}",
-
-      "capture_container_name": "images",
-      "capture_name_prefix": "{{user `arvados_cluster`}}-compute",
+      "managed_image_resource_group_name": "{{user `resource_group`}}",
+      "managed_image_name": "{{user `arvados_cluster`}}-compute-v{{ timestamp }}",
 
       "ssh_username": "{{user `ssh_user`}}",
       "ssh_private_key_file": "{{user `ssh_private_key_file`}}",
index e8265ae198316659aa56996de20cfa6f6f4612ed..030eb410b8d52fcf7c1e72e2a8be79c0af90bf7d 100755 (executable)
@@ -43,8 +43,6 @@ Options:
       Azure secrets file which will be sourced from this script
   --azure-resource-group (default: false, required if building for Azure)
       Azure resource group
-  --azure-storage-account (default: false, required if building for Azure)
-      Azure storage account
   --azure-location (default: false, required if building for Azure)
       Azure location, e.g. centralus, eastus, westeurope
   --azure-sku (default: unset, required if building for Azure, e.g. 16.04-LTS)
@@ -76,7 +74,6 @@ GCP_ACCOUNT_FILE=
 GCP_ZONE=
 AZURE_SECRETS_FILE=
 AZURE_RESOURCE_GROUP=
-AZURE_STORAGE_ACCOUNT=
 AZURE_LOCATION=
 AZURE_CLOUD_ENVIRONMENT=
 DEBUG=
@@ -86,7 +83,7 @@ AWS_DEFAULT_REGION=us-east-1
 PUBLIC_KEY_FILE=
 
 PARSEDOPTS=$(getopt --name "$0" --longoptions \
-    help,json-file:,arvados-cluster-id:,aws-source-ami:,aws-profile:,aws-secrets-file:,aws-region:,aws-vpc-id:,aws-subnet-id:,gcp-project-id:,gcp-account-file:,gcp-zone:,azure-secrets-file:,azure-resource-group:,azure-storage-account:,azure-location:,azure-sku:,azure-cloud-environment:,ssh_user:,domain:,resolver:,reposuffix:,public-key-file:,debug \
+    help,json-file:,arvados-cluster-id:,aws-source-ami:,aws-profile:,aws-secrets-file:,aws-region:,aws-vpc-id:,aws-subnet-id:,gcp-project-id:,gcp-account-file:,gcp-zone:,azure-secrets-file:,azure-resource-group:,azure-location:,azure-sku:,azure-cloud-environment:,ssh_user:,domain:,resolver:,reposuffix:,public-key-file:,debug \
     -- "" "$@")
 if [ $? -ne 0 ]; then
     exit 1
@@ -139,9 +136,6 @@ while [ $# -gt 0 ]; do
         --azure-resource-group)
             AZURE_RESOURCE_GROUP="$2"; shift
             ;;
-        --azure-storage-account)
-            AZURE_STORAGE_ACCOUNT="$2"; shift
-            ;;
         --azure-location)
             AZURE_LOCATION="$2"; shift
             ;;
@@ -248,9 +242,6 @@ fi
 if [[ "$AZURE_RESOURCE_GROUP" != "" ]]; then
   EXTRA2+=" -var resource_group=$AZURE_RESOURCE_GROUP"
 fi
-if [[ "$AZURE_STORAGE_ACCOUNT" != "" ]]; then
-  EXTRA2+=" -var storage_account=$AZURE_STORAGE_ACCOUNT"
-fi
 if [[ "$AZURE_LOCATION" != "" ]]; then
   EXTRA2+=" -var location=$AZURE_LOCATION"
 fi