14199: Merge branch 'master' into 14199-copy-from-remote
authorTom Clegg <tclegg@veritasgenetics.com>
Tue, 9 Oct 2018 19:56:26 +0000 (15:56 -0400)
committerTom Clegg <tclegg@veritasgenetics.com>
Tue, 9 Oct 2018 19:56:26 +0000 (15:56 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg@veritasgenetics.com>

27 files changed:
apps/workbench/app/assets/javascripts/models/session_db.js
apps/workbench/test/controllers/collections_controller_test.rb
apps/workbench/test/integration/collections_test.rb
apps/workbench/test/integration/jobs_test.rb
apps/workbench/test/integration_helper.rb
apps/workbench/test/test_helper.rb
services/api/Gemfile
services/api/Gemfile.lock
services/api/app/controllers/application_controller.rb
services/api/app/controllers/user_sessions_controller.rb
services/api/test/factories/api_client.rb
services/api/test/factories/api_client_authorization.rb
services/api/test/factories/group.rb
services/api/test/factories/link.rb
services/api/test/factories/user.rb
services/api/test/test_helper.rb
tools/arvbox/lib/arvbox/docker/runsu.sh
tools/crunchstat-summary/crunchstat_summary/reader.py
tools/crunchstat-summary/crunchstat_summary/summarizer.py
tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz [new file with mode: 0644]
tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz.report [new file with mode: 0644]
tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz [new file with mode: 0644]
tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report [new file with mode: 0644]
tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report [new file with mode: 0644]
tools/crunchstat-summary/tests/container_9tee4-dz642-mjfb0i5hzojp16a-crunchstat.txt.gz [deleted file]
tools/crunchstat-summary/tests/container_9tee4-dz642-mjfb0i5hzojp16a-crunchstat.txt.gz.report [deleted file]
tools/crunchstat-summary/tests/test_examples.py

index 5d42fdf07f668c3f1f25ad26543a4ecc2b26f05c..fd1cdfe1490c3c82c46e5dfbb0b09094309ea9d1 100644 (file)
@@ -157,7 +157,11 @@ window.SessionDB = function() {
             var session = db.loadLocal();
             return db.tokenUUID().then(function(token_uuid) {
                 var shaObj = new jsSHA("SHA-1", "TEXT");
-                shaObj.setHMACKey(session.token, "TEXT");
+                var secret = session.token;
+                if (session.token.startsWith("v2/")) {
+                    secret = session.token.split("/")[2];
+                }
+                shaObj.setHMACKey(secret, "TEXT");
                 shaObj.update(uuid_prefix);
                 var hmac = shaObj.getHMAC("HEX");
                 return 'v2/' + token_uuid + '/' + hmac;
@@ -255,7 +259,14 @@ window.SessionDB = function() {
             var cache = db.tokenUUIDCache;
             if (!cache) {
                 var session = db.loadLocal();
-                return db.request(session, '/arvados/v1/api_client_authorizations', {
+                if (session.token.startsWith("v2/")) {
+                    var uuid = session.token.split("/")[1]
+                    db.tokenUUIDCache = uuid;
+                    return new Promise(function(resolve, reject) {
+                        resolve(uuid);
+                    });
+                }
+                return db.request(session, 'arvados/v1/api_client_authorizations', {
                     data: {
                         filters: JSON.stringify([['api_token', '=', session.token]])
                     }
index 4f3e098d5c57875a29667b7d88fcda805505c725..3ff02a82a2711e983ce507421b2e290317effca8 100644 (file)
@@ -17,7 +17,7 @@ class CollectionsControllerTest < ActionController::TestCase
   def config_anonymous enable
     Rails.configuration.anonymous_user_token =
       if enable
-        api_fixture('api_client_authorizations')['anonymous']['api_token']
+        api_token('anonymous')
       else
         false
       end
@@ -43,7 +43,7 @@ class CollectionsControllerTest < ActionController::TestCase
 
   def assert_session_for_auth(client_auth)
     api_token =
-      api_fixture('api_client_authorizations')[client_auth.to_s]['api_token']
+      self.api_token(client_auth.to_s)
     assert_hash_includes(session, {arvados_api_token: api_token},
                          "session token does not belong to #{client_auth}")
   end
@@ -122,8 +122,7 @@ class CollectionsControllerTest < ActionController::TestCase
 
   test "viewing collection files with a reader token" do
     params = collection_params(:foo_file)
-    params[:reader_token] = api_fixture("api_client_authorizations",
-                                        "active_all_collections", "api_token")
+    params[:reader_token] = api_token("active_all_collections")
     get(:show_file_links, params)
     assert_response :redirect
     assert_no_session
@@ -132,8 +131,7 @@ class CollectionsControllerTest < ActionController::TestCase
   test "fetching collection file with reader token" do
     setup_for_keep_web
     params = collection_params(:foo_file, "foo")
-    params[:reader_token] = api_fixture("api_client_authorizations",
-                                        "active_all_collections", "api_token")
+    params[:reader_token] = api_token("active_all_collections")
     get(:show_file, params)
     assert_response :redirect
     assert_match /foo/, response.redirect_url
@@ -178,7 +176,7 @@ class CollectionsControllerTest < ActionController::TestCase
   test "getting a file from Keep with a good reader token" do
     setup_for_keep_web
     params = collection_params(:foo_file, 'foo')
-    read_token = api_fixture('api_client_authorizations')['active']['api_token']
+    read_token = api_token('active')
     params[:reader_token] = read_token
     get(:show_file, params)
     assert_response :redirect
@@ -192,7 +190,7 @@ class CollectionsControllerTest < ActionController::TestCase
       config_anonymous anon
       params = collection_params(:foo_file, 'foo')
       params[:reader_token] =
-        api_fixture('api_client_authorizations')['active_noscope']['api_token']
+        api_token('active_noscope')
       get(:show_file, params)
       if anon
         # Some files can be shown without a valid token, but not this one.
@@ -209,7 +207,7 @@ class CollectionsControllerTest < ActionController::TestCase
     setup_for_keep_web
     params = collection_params(:foo_file, 'foo')
     sess = session_for(:expired)
-    read_token = api_fixture('api_client_authorizations')['active']['api_token']
+    read_token = api_token('active')
     params[:reader_token] = read_token
     get(:show_file, params, sess)
     assert_response :redirect
@@ -475,20 +473,20 @@ class CollectionsControllerTest < ActionController::TestCase
   %w(uuid portable_data_hash).each do |id_type|
     test "Redirect to keep_web_url via #{id_type}" do
       setup_for_keep_web
-      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      tok = api_token('active')
       id = api_fixture('collections')['w_a_z_file'][id_type]
       get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
       assert_response :redirect
-      assert_equal "https://#{id.sub '+', '-'}.example/_/w%20a%20z?api_token=#{tok}", @response.redirect_url
+      assert_equal "https://#{id.sub '+', '-'}.example/_/w%20a%20z?api_token=#{URI.escape tok, '/'}", @response.redirect_url
     end
 
     test "Redirect to keep_web_url via #{id_type} with reader token" do
       setup_for_keep_web
-      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      tok = api_token('active')
       id = api_fixture('collections')['w_a_z_file'][id_type]
       get :show_file, {uuid: id, file: "w a z", reader_token: tok}, session_for(:expired)
       assert_response :redirect
-      assert_equal "https://#{id.sub '+', '-'}.example/t=#{tok}/_/w%20a%20z", @response.redirect_url
+      assert_equal "https://#{id.sub '+', '-'}.example/t=#{URI.escape tok}/_/w%20a%20z", @response.redirect_url
     end
 
     test "Redirect to keep_web_url via #{id_type} with no token" do
@@ -516,22 +514,22 @@ class CollectionsControllerTest < ActionController::TestCase
     test "Redirect to keep_web_download_url via #{id_type}" do
       setup_for_keep_web('https://collections.example/c=%{uuid_or_pdh}',
                          'https://download.example/c=%{uuid_or_pdh}')
-      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      tok = api_token('active')
       id = api_fixture('collections')['w_a_z_file'][id_type]
       get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
       assert_response :redirect
-      assert_equal "https://download.example/c=#{id.sub '+', '-'}/_/w%20a%20z?api_token=#{tok}", @response.redirect_url
+      assert_equal "https://download.example/c=#{id.sub '+', '-'}/_/w%20a%20z?api_token=#{URI.escape tok, '/'}", @response.redirect_url
     end
 
     test "Redirect to keep_web_url via #{id_type} when trust_all_content enabled" do
       Rails.configuration.trust_all_content = true
       setup_for_keep_web('https://collections.example/c=%{uuid_or_pdh}',
                          'https://download.example/c=%{uuid_or_pdh}')
-      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      tok = api_token('active')
       id = api_fixture('collections')['w_a_z_file'][id_type]
       get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
       assert_response :redirect
-      assert_equal "https://collections.example/c=#{id.sub '+', '-'}/_/w%20a%20z?api_token=#{tok}", @response.redirect_url
+      assert_equal "https://collections.example/c=#{id.sub '+', '-'}/_/w%20a%20z?api_token=#{URI.escape tok, '/'}", @response.redirect_url
     end
   end
 
@@ -548,7 +546,7 @@ class CollectionsControllerTest < ActionController::TestCase
       config_anonymous anon
       setup_for_keep_web('https://collections.example/c=%{uuid_or_pdh}',
                          'https://download.example/c=%{uuid_or_pdh}')
-      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      tok = api_token('active')
       id = api_fixture('collections')['public_text_file']['uuid']
       get :show_file, {
         uuid: id,
@@ -558,7 +556,7 @@ class CollectionsControllerTest < ActionController::TestCase
       assert_response :redirect
       expect_url = "https://download.example/c=#{id.sub '+', '-'}/_/Hello%20world.txt"
       if not anon
-        expect_url += "?api_token=#{tok}"
+        expect_url += "?api_token=#{URI.escape tok, '/'}"
       end
       assert_equal expect_url, @response.redirect_url
     end
@@ -577,11 +575,11 @@ class CollectionsControllerTest < ActionController::TestCase
     test "Redirect preview to keep_web_download_url when preview is disabled and trust_all_content is #{trust_all_content}" do
       Rails.configuration.trust_all_content = trust_all_content
       setup_for_keep_web false, 'https://download.example/c=%{uuid_or_pdh}'
-      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      tok = api_token('active')
       id = api_fixture('collections')['w_a_z_file']['uuid']
       get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
       assert_response :redirect
-      assert_equal "https://download.example/c=#{id.sub '+', '-'}/_/w%20a%20z?api_token=#{tok}", @response.redirect_url
+      assert_equal "https://download.example/c=#{id.sub '+', '-'}/_/w%20a%20z?api_token=#{URI.escape tok, '/'}", @response.redirect_url
     end
   end
 
index 9aa868c2b8b90ee2dab6a1bbf94dae39d305df96..6dd3c526968f3f574ee4b5fcba5746f78fe57a5e 100644 (file)
@@ -57,7 +57,7 @@ class CollectionsTest < ActionDispatch::IntegrationTest
   test "can download an entire collection with a reader token" do
     use_keep_web_config
 
-    token = api_fixture('api_client_authorizations')['active']['api_token']
+    token = api_token('active')
     data = "foo\nfile\n"
     datablock = `echo -n #{data.shellescape} | ARVADOS_API_TOKEN=#{token.shellescape} arv-put --no-progress --raw -`.strip
     assert $?.success?, $?
index bfed03b14bd223085dd7fa704eb8494c951ec198..bf48d88cf3f754455d9902727c3b91f4806672d1 100644 (file)
@@ -63,7 +63,7 @@ class JobsTest < ActionDispatch::IntegrationTest
   test 'view log via keep-web redirect' do
     use_keep_web_config
 
-    token = api_fixture('api_client_authorizations')['active']['api_token']
+    token = api_token('active')
     logdata = fakepipe_with_log_data.read
     logblock = `echo -n #{logdata.shellescape} | ARVADOS_API_TOKEN=#{token.shellescape} arv-put --no-progress --raw -`.strip
     assert $?.success?, $?
index 33e50087e77d127e9c30991860b92315aade3d33..5fbdd5c6f010c010b0a85c9eeb1c77b9492173a2 100644 (file)
@@ -244,7 +244,7 @@ class ActionDispatch::IntegrationTest
 end
 
 def upload_data_and_get_collection(data, user, filename, owner_uuid=nil)
-  token = api_fixture('api_client_authorizations')[user]['api_token']
+  token = api_token(user)
   datablock = `echo -n #{data.shellescape} | ARVADOS_API_TOKEN=#{token.shellescape} arv-put --no-progress --raw -`.strip
   assert $?.success?, $?
   col = nil
index 2fd926ff18d6d6f555927df43d6764e9dbea3099..8435eb4b7c6331f54483a3b363d1af67f1c7521b 100644 (file)
@@ -39,7 +39,7 @@ class ActiveSupport::TestCase
     user_was = Thread.current[:user]
     token_was = Thread.current[:arvados_api_token]
     auth = api_fixture('api_client_authorizations')[token_name.to_s]
-    Thread.current[:arvados_api_token] = auth['api_token']
+    Thread.current[:arvados_api_token] = "v2/#{auth['uuid']}/#{auth['api_token']}"
     if block_given?
       begin
         yield
@@ -92,10 +92,16 @@ module ApiFixtureLoader
       keys.inject(@@api_fixtures[name]) { |hash, key| hash[key] }.deep_dup
     end
   end
+
   def api_fixture(name, *keys)
     self.class.api_fixture(name, *keys)
   end
 
+  def api_token(name)
+    auth = api_fixture('api_client_authorizations')[name]
+    "v2/#{auth['uuid']}/#{auth['api_token']}"
+  end
+
   def find_fixture(object_class, name)
     object_class.find(api_fixture(object_class.to_s.pluralize.underscore,
                                   name, "uuid"))
@@ -146,8 +152,9 @@ end
 class ActiveSupport::TestCase
   include ApiFixtureLoader
   def session_for api_client_auth_name
+    auth = api_fixture('api_client_authorizations')[api_client_auth_name.to_s]
     {
-      arvados_api_token: api_fixture('api_client_authorizations')[api_client_auth_name.to_s]['api_token']
+      arvados_api_token: "v2/#{auth['uuid']}/#{auth['api_token']}"
     }
   end
   def json_response
@@ -302,7 +309,7 @@ class ActiveSupport::TestCase
     return unless Rails.env == 'test'
 
     auth = api_fixture('api_client_authorizations')['admin_trustedclient']
-    Thread.current[:arvados_api_token] = auth['api_token']
+    Thread.current[:arvados_api_token] = "v2/#{auth['uuid']}/#{auth['api_token']}"
     ArvadosApiClient.new.api(nil, '../../database/reset', {})
     Thread.current[:arvados_api_token] = nil
   end
index 0dc38f1e726f2e1dfbf0f1645c389f964920ee14..5cf854b52072e0ecbdf5780d3973dcee7b485b61 100644 (file)
@@ -9,7 +9,7 @@ gem 'responders', '~> 2.0'
 gem 'protected_attributes'
 
 group :test, :development do
-  gem 'factory_girl_rails'
+  gem 'factory_bot_rails'
   gem 'database_cleaner'
   gem 'ruby-prof'
   # Note: "require: false" here tells bunder not to automatically
index f935f2c0723a64a99bce17cef26679862931672e..3cf79b2d8f32d73064e27c2d831f5f6dde6baafd 100644 (file)
@@ -93,10 +93,10 @@ GEM
     eventmachine (1.2.6)
     execjs (2.7.0)
     extlib (0.9.16)
-    factory_girl (4.9.0)
+    factory_bot (4.11.1)
       activesupport (>= 3.0.0)
-    factory_girl_rails (4.9.0)
-      factory_girl (~> 4.9.0)
+    factory_bot_rails (4.11.1)
+      factory_bot (~> 4.11.1)
       railties (>= 3.0.0)
     faraday (0.12.2)
       multipart-post (>= 1.2, < 3)
@@ -294,7 +294,7 @@ DEPENDENCIES
   arvados-cli
   coffee-rails (~> 4.0)
   database_cleaner
-  factory_girl_rails
+  factory_bot_rails
   faye-websocket
   httpclient
   jquery-rails
index 8589b3c7a55f366fb5668884d71aac40139bea7f..a0555d13d762a495d9e30a57347ed3336f0e0984 100644 (file)
@@ -344,13 +344,20 @@ class ApplicationController < ActionController::Base
     # If there are too many reader tokens, assume the request is malicious
     # and ignore it.
     if request.get? and params[:reader_tokens] and
-        params[:reader_tokens].size < 100
+      params[:reader_tokens].size < 100
+      secrets = params[:reader_tokens].map { |t|
+        if t.is_a? String and t.starts_with? "v2/"
+          t.split("/")[2]
+        else
+          t
+        end
+      }
       @read_auths += ApiClientAuthorization
         .includes(:user)
         .where('api_token IN (?) AND
                 (expires_at IS NULL OR expires_at > CURRENT_TIMESTAMP)',
-               params[:reader_tokens])
-        .all
+               secrets)
+        .to_a
     end
     @read_auths.select! { |auth| auth.scopes_allow_request? request }
     @read_users = @read_auths.map(&:user).uniq
index b8fe2948923582ad9f40f3ec00c394cd6b2473ec..020dfa53b83a6ba645a79a1696b84968144cc0cd 100644 (file)
@@ -159,7 +159,7 @@ class UserSessionsController < ApplicationController
     else
       callback_url += '?'
     end
-    callback_url += 'api_token=' + api_client_auth.api_token
+    callback_url += 'api_token=' + api_client_auth.token
     redirect_to callback_url
   end
 
index 55f795702ac0b00df5476e079554f9fbcf019409..ee2017bf72f8553f406120c40cdd5f174dfedb26 100644 (file)
@@ -2,9 +2,9 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-FactoryGirl.define do
+FactoryBot.define do
   factory :api_client do
-    is_trusted false
+    is_trusted { false }
     to_create do |instance|
       CurrentApiClientHelper.act_as_system_user do
         instance.save!
index f5e3ab4ccb1f17537f6d4f3075a791555861bdab..af2660a5ff816df9121a83326a3cafd5e440c2c7 100644 (file)
@@ -2,10 +2,10 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-FactoryGirl.define do
+FactoryBot.define do
   factory :api_client_authorization do
     api_client
-    scopes ['all']
+    scopes { ['all'] }
 
     trait :trusted do
       association :api_client, factory: :api_client, is_trusted: true
index dcff783185839c7384e93432edf15359c0961362..7af57354b7eefa1c035b02768872cb2a5fec4d04 100644 (file)
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-FactoryGirl.define do
+FactoryBot.define do
   factory :group do
   end
 end
index 00f463d37008c105f0ceb4f919f00a1cfdcfe5bd..291a04e0c78a63f5882b3b8991e86ea15aea8863 100644 (file)
@@ -2,10 +2,10 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-FactoryGirl.define do
+FactoryBot.define do
   factory :link do
     factory :permission_link do
-      link_class 'permission'
+      link_class { 'permission' }
     end
   end
 end
index 344e571ef70bc12f3fbf331ef89c7170490b485c..91d939516485328ba492717b55d60ba9de216793 100644 (file)
@@ -6,10 +6,10 @@ class CurrentApiClientHelper
   extend CurrentApiClient
 end
 
-FactoryGirl.define do
+FactoryBot.define do
   factory :user do
     transient do
-      join_groups []
+      join_groups { [] }
     end
     after :create do |user, evaluator|
       CurrentApiClientHelper.act_as_system_user do
@@ -25,13 +25,13 @@ FactoryGirl.define do
         end
       end
     end
-    first_name "Factory"
-    last_name "Factory"
+    first_name { "Factory" }
+    last_name { "Factory" }
     identity_url do
       "https://example.com/#{rand(2**24).to_s(36)}"
     end
     factory :active_user do
-      is_active true
+      is_active { true }
       after :create do |user|
         CurrentApiClientHelper.act_as_system_user do
           Link.create!(tail_uuid: user.uuid,
index 73b45f95ec71a7b28564c8a5767eb48503ec5465..ffd50d808072887884caabffab08252aa2d71716 100644 (file)
@@ -33,7 +33,7 @@ end
 require File.expand_path('../../config/environment', __FILE__)
 require 'rails/test_help'
 require 'mocha'
-require 'mocha/mini_test'
+require 'mocha/minitest'
 
 module ArvadosTestSupport
   def json_response
@@ -41,11 +41,11 @@ module ArvadosTestSupport
   end
 
   def api_token(api_client_auth_name)
-    api_client_authorizations(api_client_auth_name).api_token
+    api_client_authorizations(api_client_auth_name).token
   end
 
   def auth(api_client_auth_name)
-    {'HTTP_AUTHORIZATION' => "OAuth2 #{api_token(api_client_auth_name)}"}
+    {'HTTP_AUTHORIZATION' => "Bearer #{api_token(api_client_auth_name)}"}
   end
 
   def show_errors model
@@ -54,7 +54,7 @@ module ArvadosTestSupport
 end
 
 class ActiveSupport::TestCase
-  include FactoryGirl::Syntax::Methods
+  include FactoryBot::Syntax::Methods
   fixtures :all
 
   include ArvadosTestSupport
@@ -119,14 +119,14 @@ class ActiveSupport::TestCase
   end
 
   def authorize_with api_client_auth_name
-    authorize_with_token api_client_authorizations(api_client_auth_name).api_token
+    authorize_with_token api_client_authorizations(api_client_auth_name).token
   end
 
   def authorize_with_token token
     t = token
-    t = t.api_token if t.respond_to? :api_token
+    t = t.token if t.respond_to? :token
     ArvadosApiToken.new.call("rack.input" => "",
-                             "HTTP_AUTHORIZATION" => "OAuth2 #{t}")
+                             "HTTP_AUTHORIZATION" => "Bearer #{t}")
   end
 
   def salt_token(fixture:, remote:)
index 695190a238052c403c45344b98ef8f74eabad289..88d832f0e837351ab79b5571163bd14a900e2429 100755 (executable)
@@ -10,6 +10,8 @@ flock /var/lib/arvados/createusers.lock /usr/local/lib/arvbox/createusers.sh
 
 export HOME=/var/lib/arvados
 
+chown arvbox /dev/stderr
+
 if test -z "$1" ; then
     exec chpst -u arvbox:arvbox:docker $0-service
 else
index 98dda673d5a3ab70d65ab1d3989b49f539959b69..311c006c07d882a40ee5af8eaae651ba1e3c7145 100644 (file)
@@ -5,9 +5,9 @@
 from __future__ import print_function
 
 import arvados
+import itertools
 import Queue
 import threading
-import _strptime
 
 from crunchstat_summary import logger
 
@@ -16,7 +16,7 @@ class CollectionReader(object):
     def __init__(self, collection_id):
         self._collection_id = collection_id
         self._label = collection_id
-        self._reader = None
+        self._readers = []
 
     def __str__(self):
         return self._label
@@ -25,21 +25,25 @@ class CollectionReader(object):
         logger.debug('load collection %s', self._collection_id)
         collection = arvados.collection.CollectionReader(self._collection_id)
         filenames = [filename for filename in collection]
-        if len(filenames) == 1:
-            filename = filenames[0]
-        else:
-            filename = 'crunchstat.txt'
-        self._label = "{}/{}".format(self._collection_id, filename)
-        self._reader = collection.open(filename)
-        return iter(self._reader)
+        # Crunch2 has multiple stats files
+        if len(filenames) > 1:
+            filenames = ['crunchstat.txt', 'arv-mount.txt']
+        for filename in filenames:
+            try:
+                self._readers.append(collection.open(filename))
+            except IOError:
+                logger.warn('Unable to open %s', filename)
+        self._label = "{}/{}".format(self._collection_id, filenames[0])
+        return itertools.chain(*[iter(reader) for reader in self._readers])
 
     def __enter__(self):
         return self
 
     def __exit__(self, exc_type, exc_val, exc_tb):
-        if self._reader:
-            self._reader.close()
-            self._reader = None
+        if self._readers:
+            for reader in self._readers:
+                reader.close()
+            self._readers = []
 
 
 class LiveLogReader(object):
index d91161c70c3aabdaa6223063946686f437adeaa0..b2f6f1bb700b6d5d2a04f0212c699eb1ace15435 100644 (file)
@@ -37,6 +37,7 @@ WEBCHART_CLASS = crunchstat_summary.dygraphs.DygraphsChart
 class Task(object):
     def __init__(self):
         self.starttime = None
+        self.finishtime = None
         self.series = collections.defaultdict(list)
 
 
@@ -115,12 +116,14 @@ class Summarizer(object):
                     logger.debug('%s: done %s', self.label, uuid)
                     continue
 
-                m = re.search(r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr crunchstat: (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
+                # 2017-12-02_17:15:08 e51c5-8i9sb-mfp68stkxnqdd6m 63676 0 stderr crunchstat: keepcalls 0 put 2576 get -- interval 10.0000 seconds 0 put 2576 get
+                m = re.search(r'^(?P<timestamp>[^\s.]+)(\.\d+)? (?P<job_uuid>\S+) \d+ (?P<seq>\d+) stderr (?P<crunchstat>crunchstat: )(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
                 if not m:
                     continue
             else:
                 # crunch2
-                m = re.search(r'^(?P<timestamp>\S+) (?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
+                # 2017-12-01T16:56:24.723509200Z crunchstat: keepcalls 0 put 3 get -- interval 10.0000 seconds 0 put 3 get
+                m = re.search(r'^(?P<timestamp>\S+) (?P<crunchstat>crunchstat: )?(?P<category>\S+) (?P<current>.*?)( -- interval (?P<interval>.*))?\n$', line)
                 if not m:
                     continue
 
@@ -158,15 +161,24 @@ class Summarizer(object):
                 raise ValueError("Cannot parse timestamp {!r}".format(
                     timestamp))
 
-            if not task.starttime:
-                task.starttime = timestamp
+            if task.starttime is None:
                 logger.debug('%s: task %s starttime %s',
                              self.label, task_id, timestamp)
-            task.finishtime = timestamp
+            if task.starttime is None or timestamp < task.starttime:
+                task.starttime = timestamp
+            if task.finishtime is None or timestamp > task.finishtime:
+                task.finishtime = timestamp
 
-            if not self.starttime:
+            if self.starttime is None or timestamp < task.starttime:
                 self.starttime = timestamp
-            self.finishtime = timestamp
+            if self.finishtime is None or timestamp < task.finishtime:
+                self.finishtime = timestamp
+
+            if (not self.detected_crunch1) and task.starttime is not None and task.finishtime is not None:
+                elapsed = (task.finishtime - task.starttime).seconds
+                self.task_stats[task_id]['time'] = {'elapsed': elapsed}
+                if elapsed > self.stats_max['time']['elapsed']:
+                    self.stats_max['time']['elapsed'] = elapsed
 
             this_interval_s = None
             for group in ['current', 'interval']:
@@ -182,10 +194,16 @@ class Summarizer(object):
                         else:
                             stats[stat] = int(val)
                 except ValueError as e:
-                    logger.warning(
-                        'Error parsing value %r (stat %r, category %r): %r',
-                        val, stat, category, e)
-                    logger.warning('%s', line)
+                    # If the line doesn't start with 'crunchstat:' we
+                    # might have mistaken an error message for a
+                    # structured crunchstat line.
+                    if m.group("crunchstat") is None or m.group("category") == "crunchstat":
+                        logger.warning("%s: log contains message\n  %s", self.label, line)
+                    else:
+                        logger.warning(
+                            '%s: Error parsing value %r (stat %r, category %r): %r',
+                            self.label, val, stat, category, e)
+                        logger.warning('%s', line)
                     continue
                 if 'user' in stats or 'sys' in stats:
                     stats['user+sys'] = stats.get('user', 0) + stats.get('sys', 0)
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz
new file mode 100644 (file)
index 0000000..ff7dd30
Binary files /dev/null and b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz differ
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz.report b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz.report
new file mode 100644 (file)
index 0000000..9819461
--- /dev/null
@@ -0,0 +1,24 @@
+category       metric  task_max        task_max_rate   job_total
+blkio:0:0      read    0       0       0
+blkio:0:0      write   0       0       0
+fuseops        read    0       0       0
+fuseops        write   0       0       0
+keepcache      hit     0       0       0
+keepcache      miss    0       0       0
+keepcalls      get     0       0       0
+keepcalls      put     0       0       0
+net:keep0      rx      0       0       0
+net:keep0      tx      0       0       0
+net:keep0      tx+rx   0       0       0
+time   elapsed 10      -       10
+# Number of tasks: 1
+# Max CPU time spent by a single task: 0s
+# Max CPU usage in a single interval: 0%
+# Overall CPU usage: 0%
+# Max memory used by a single task: 0.00GB
+# Max network traffic in a single task: 0.00GB
+# Max network speed in a single interval: 0.00MB/s
+# Keep cache miss rate 0.00%
+# Keep cache utilization 0.00%
+#!! container max CPU usage was 0% -- try runtime_constraints "vcpus":1
+#!! container max RSS was 0 MiB -- try runtime_constraints "ram":0
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz
new file mode 100644 (file)
index 0000000..249ad22
Binary files /dev/null and b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz differ
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz.report
new file mode 100644 (file)
index 0000000..b61da15
--- /dev/null
@@ -0,0 +1,27 @@
+category       metric  task_max        task_max_rate   job_total
+cpu    cpus    20      -       -
+cpu    sys     0.39    0.04    0.39
+cpu    user    2.06    0.20    2.06
+cpu    user+sys        2.45    0.24    2.45
+mem    cache   172032  -       -
+mem    pgmajfault      0       -       0
+mem    rss     69525504        -       -
+mem    swap    0       -       -
+net:eth0       rx      859480  1478.97 859480
+net:eth0       tx      55888   395.71  55888
+net:eth0       tx+rx   915368  1874.69 915368
+statfs available       397744787456    -       397744787456
+statfs total   402611240960    -       402611240960
+statfs used    4870303744      52426.18        4866453504
+time   elapsed 20      -       20
+# Number of tasks: 1
+# Max CPU time spent by a single task: 2.45s
+# Max CPU usage in a single interval: 23.70%
+# Overall CPU usage: 12.25%
+# Max memory used by a single task: 0.07GB
+# Max network traffic in a single task: 0.00GB
+# Max network speed in a single interval: 0.00MB/s
+# Keep cache miss rate 0.00%
+# Keep cache utilization 0.00%
+#!! container max CPU usage was 24% -- try runtime_constraints "vcpus":1
+#!! container max RSS was 67 MiB -- try runtime_constraints "ram":1020054732
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report b/tools/crunchstat-summary/tests/container_9tee4-dz642-lymtndkpy39eibk.txt.gz.report
new file mode 100644 (file)
index 0000000..9d3cd78
--- /dev/null
@@ -0,0 +1,38 @@
+category       metric  task_max        task_max_rate   job_total
+blkio:0:0      read    0       0       0
+blkio:0:0      write   0       0       0
+cpu    cpus    20      -       -
+cpu    sys     0.39    0.04    0.39
+cpu    user    2.06    0.20    2.06
+cpu    user+sys        2.45    0.24    2.45
+fuseops        read    0       0       0
+fuseops        write   0       0       0
+keepcache      hit     0       0       0
+keepcache      miss    0       0       0
+keepcalls      get     0       0       0
+keepcalls      put     0       0       0
+mem    cache   172032  -       -
+mem    pgmajfault      0       -       0
+mem    rss     69525504        -       -
+mem    swap    0       -       -
+net:eth0       rx      859480  1478.97 859480
+net:eth0       tx      55888   395.71  55888
+net:eth0       tx+rx   915368  1874.69 915368
+net:keep0      rx      0       0       0
+net:keep0      tx      0       0       0
+net:keep0      tx+rx   0       0       0
+statfs available       397744787456    -       397744787456
+statfs total   402611240960    -       402611240960
+statfs used    4870303744      52426.18        4866453504
+time   elapsed 20      -       20
+# Number of tasks: 1
+# Max CPU time spent by a single task: 2.45s
+# Max CPU usage in a single interval: 23.70%
+# Overall CPU usage: 12.25%
+# Max memory used by a single task: 0.07GB
+# Max network traffic in a single task: 0.00GB
+# Max network speed in a single interval: 0.00MB/s
+# Keep cache miss rate 0.00%
+# Keep cache utilization 0.00%
+#!! container max CPU usage was 24% -- try runtime_constraints "vcpus":1
+#!! container max RSS was 67 MiB -- try runtime_constraints "ram":1020054732
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-mjfb0i5hzojp16a-crunchstat.txt.gz b/tools/crunchstat-summary/tests/container_9tee4-dz642-mjfb0i5hzojp16a-crunchstat.txt.gz
deleted file mode 100644 (file)
index 8b069e7..0000000
Binary files a/tools/crunchstat-summary/tests/container_9tee4-dz642-mjfb0i5hzojp16a-crunchstat.txt.gz and /dev/null differ
diff --git a/tools/crunchstat-summary/tests/container_9tee4-dz642-mjfb0i5hzojp16a-crunchstat.txt.gz.report b/tools/crunchstat-summary/tests/container_9tee4-dz642-mjfb0i5hzojp16a-crunchstat.txt.gz.report
deleted file mode 100644 (file)
index 88e06a3..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-category       metric  task_max        task_max_rate   job_total
-cpu    cpus    20      -       -
-cpu    sys     0.82    0.08    0.82
-cpu    user    2.31    0.22    2.31
-cpu    user+sys        3.13    0.30    3.13
-mem    cache   23846912        -       -
-mem    pgmajfault      121     -       121
-mem    rss     65470464        -       -
-mem    swap    0       -       -
-net:eth0       rx      500762  951.15  500762
-net:eth0       tx      36242   226.61  36242
-net:eth0       tx+rx   537004  1177.76 537004
-# Number of tasks: 1
-# Max CPU time spent by a single task: 3.13s
-# Max CPU usage in a single interval: 29.89%
-# Overall CPU usage: 0%
-# Max memory used by a single task: 0.07GB
-# Max network traffic in a single task: 0.00GB
-# Max network speed in a single interval: 0.00MB/s
-# Keep cache miss rate 0.00%
-# Keep cache utilization 0.00%
-#!! container max CPU usage was 30% -- try runtime_constraints "vcpus":1
-#!! container max RSS was 63 MiB -- try runtime_constraints "ram":1020054732
index 6271f5665c040cd45d203cf4e590b56d19d97f86..af92becd80a6875d64e1d406d2b21f8bfbd6ec57 100644 (file)
@@ -61,7 +61,7 @@ class SummarizeEdgeCases(unittest.TestCase):
 
 class SummarizeContainer(ReportDiff):
     fake_container = {
-        'uuid': '9tee4-dz642-mjfb0i5hzojp16a',
+        'uuid': '9tee4-dz642-lymtndkpy39eibk',
         'created_at': '2017-08-18T14:27:25.371388141',
         'log': '9tee4-4zz18-ihyzym9tcwjwg4r',
     }
@@ -71,8 +71,12 @@ class SummarizeContainer(ReportDiff):
         'created_at': '2017-08-18T14:27:25.242339223Z',
         'container_uuid': fake_container['uuid'],
     }
+    reportfile = os.path.join(
+        TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk.txt.gz')
     logfile = os.path.join(
-        TESTS_DIR, 'container_9tee4-dz642-mjfb0i5hzojp16a-crunchstat.txt.gz')
+        TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz')
+    arvmountlog = os.path.join(
+        TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz')
 
     @mock.patch('arvados.collection.CollectionReader')
     @mock.patch('arvados.api')
@@ -82,13 +86,18 @@ class SummarizeContainer(ReportDiff):
         mock_api().containers().get().execute.return_value = self.fake_container
         mock_cr().__iter__.return_value = [
             'crunch-run.txt', 'stderr.txt', 'node-info.txt',
-            'container.json', 'crunchstat.txt']
-        mock_cr().open.return_value = gzip.open(self.logfile)
+            'container.json', 'crunchstat.txt', 'arv-mount.txt']
+        def _open(n):
+            if n == "crunchstat.txt":
+                return gzip.open(self.logfile)
+            elif n == "arv-mount.txt":
+                return gzip.open(self.arvmountlog)
+        mock_cr().open.side_effect = _open
         args = crunchstat_summary.command.ArgumentParser().parse_args(
             ['--job', self.fake_request['uuid']])
         cmd = crunchstat_summary.command.Command(args)
         cmd.run()
-        self.diff_known_report(self.logfile, cmd)
+        self.diff_known_report(self.reportfile, cmd)
 
 
 class SummarizeJob(ReportDiff):