From: Peter Amstutz Date: Thu, 17 Sep 2020 13:57:15 +0000 (-0400) Subject: Merge branch '16601-new-tutorial' refs #16601 X-Git-Tag: 2.1.0~59 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/a0398ebd1c50b1be2433c109af6bb0d263c54ea5?hp=65ed3963aabde8a25b1c4c0bb9ca858edf76c277 Merge branch '16601-new-tutorial' refs #16601 Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- diff --git a/build/run-library.sh b/build/run-library.sh index 528d69d998..df551455c1 100755 --- a/build/run-library.sh +++ b/build/run-library.sh @@ -704,6 +704,8 @@ fpm_build_virtualenv () { COMMAND_ARR+=(".") + debug_echo -e "\n${COMMAND_ARR[@]}\n" + FPM_RESULTS=$("${COMMAND_ARR[@]}") FPM_EXIT_CODE=$? @@ -827,13 +829,13 @@ fpm_build () { COMMAND_ARR+=('--exclude' "$i") done + COMMAND_ARR+=("${fpm_args[@]}") + # Append remaining function arguments directly to fpm's command line. for i; do COMMAND_ARR+=("$i") done - COMMAND_ARR+=("${fpm_args[@]}") - COMMAND_ARR+=("$PACKAGE") debug_echo -e "\n${COMMAND_ARR[@]}\n" diff --git a/build/run-tests.sh b/build/run-tests.sh index 6359fff1de..32d4a75d2b 100755 --- a/build/run-tests.sh +++ b/build/run-tests.sh @@ -650,6 +650,7 @@ install_env() { . "$VENV3DIR/bin/activate" # Needed for run_test_server.py which is used by certain (non-Python) tests. + # pdoc3 needed to generate the Python SDK documentation. ( set -e "${VENV3DIR}/bin/pip3" install wheel @@ -660,6 +661,7 @@ install_env() { "${VENV3DIR}/bin/pip3" install ciso8601 "${VENV3DIR}/bin/pip3" install pycurl "${VENV3DIR}/bin/pip3" install ws4py + "${VENV3DIR}/bin/pip3" install pdoc3 cd "$WORKSPACE/sdk/python" python3 setup.py install ) || fatal "installing PyYAML and sdk/python failed" diff --git a/doc/Gemfile.lock b/doc/Gemfile.lock index 344a0a86b5..b5e62cacd6 100644 --- a/doc/Gemfile.lock +++ b/doc/Gemfile.lock @@ -1,28 +1,23 @@ GEM remote: https://rubygems.org/ specs: - RedCloth (4.2.9) - coderay (1.1.0) - colorize (0.6.0) - kramdown (1.3.1) - less (1.2.21) - mutter (>= 0.4.2) - treetop (>= 1.4.2) - liquid (2.6.1) - makerakeworkwell (1.0.3) - rake (>= 0.9.2, < 11) - mutter (0.5.3) - polyglot (0.3.3) - rake (10.1.1) - treetop (1.4.15) - polyglot - polyglot (>= 0.3.1) - zenweb (3.3.1) + RedCloth (4.3.2) + coderay (1.1.3) + colorize (0.8.1) + commonjs (0.2.7) + kramdown (1.17.0) + less (2.6.0) + commonjs (~> 0.2.7) + liquid (4.0.3) + makerakeworkwell (1.0.4) + rake (>= 0.9.2, < 15) + rake (13.0.1) + zenweb (3.10.4) coderay (~> 1.0) - kramdown (~> 1.0) - less (~> 1.2) + kramdown (~> 1.4) + less (~> 2.0) makerakeworkwell (~> 1.0) - rake (>= 0.9, < 11) + rake (>= 0.9, < 15) PLATFORMS ruby @@ -32,3 +27,6 @@ DEPENDENCIES colorize liquid zenweb + +BUNDLED WITH + 2.1.4 diff --git a/doc/README.textile b/doc/README.textile index 75a30e9ef2..85757980a7 100644 --- a/doc/README.textile +++ b/doc/README.textile @@ -13,20 +13,28 @@ Additional information is available on the "'Documentation' page on the Arvados h2. Install dependencies
+arvados/doc$ sudo apt-get install build-essential libcurl4-openssl-dev libgnutls28-dev libssl-dev
 arvados/doc$ bundle install
-arvados/doc$ pip install epydoc
+
+ +To generate the Python SDK documentation, these additional dependencies are needed: + +
+arvados/doc$ sudo apt-get install python3-pip
+arvados/doc$ pip3 install arvados-python-client
+arvados/doc$ pip3 install pdoc3
 
h2. Generate HTML pages
-arvados/doc$ rake
+arvados/doc$ bundle exec rake
 
Alternately, to make the documentation browsable on the local filesystem:
-arvados/doc$ rake generate baseurl=$PWD/.site
+arvados/doc$ bundle exec rake generate baseurl=$PWD/.site
 
h2. Run linkchecker @@ -35,7 +43,7 @@ If you have "Linkchecker":http://wummel.github.io/linkchecker/ installed on your system, you can run it against the documentation:
-arvados/doc$ rake linkchecker baseurl=file://$PWD/.site
+arvados/doc$ bundle exec rake linkchecker baseurl=file://$PWD/.site
 
Please note that this will regenerate your $PWD/.site directory. @@ -43,7 +51,7 @@ Please note that this will regenerate your $PWD/.site directory. h2. Preview HTML pages
-arvados/doc$ rake run
+arvados/doc$ bundle exec rake run
 [2014-03-10 09:03:41] INFO  WEBrick 1.3.1
 [2014-03-10 09:03:41] INFO  ruby 2.1.1 (2014-02-24) [x86_64-linux]
 [2014-03-10 09:03:41] INFO  WEBrick::HTTPServer#start: pid=8926 port=8000
@@ -58,7 +66,7 @@ h2. Publish HTML pages inside Workbench
 You can set @baseurl@ (the URL prefix for all internal links), @arvados_cluster_uuid@, @arvados_api_host@ and @arvados_workbench_host@ without changing @_config.yml@:
 
 
-arvados/doc$ rake generate baseurl=/doc arvados_api_host=xyzzy.arvadosapi.com
+arvados/doc$ bundle exec rake generate baseurl=/doc arvados_api_host=xyzzy.arvadosapi.com
 
Make the docs appear at {workbench_host}/doc by creating a symbolic link in Workbench's @public@ directory, pointing to the generated HTML tree. @@ -70,5 +78,5 @@ arvados/doc$ ln -sn ../../../doc/.site ../apps/workbench/public/doc h2. Delete generated files
-arvados/doc$ rake realclean
+arvados/doc$ bundle exec rake realclean
 
diff --git a/doc/Rakefile b/doc/Rakefile index 623dbd033b..f7050dc41f 100644 --- a/doc/Rakefile +++ b/doc/Rakefile @@ -35,12 +35,12 @@ file "sdk/python/arvados/index.html" do |t| if ENV['NO_SDK'] || File.exists?("no-sdk") next end - `which epydoc` + `which pdoc` if $? == 0 - STDERR.puts `epydoc --html --parse-only -o sdk/python/arvados ../sdk/python/arvados/ 2>&1` + STDERR.puts `pdoc --html -o sdk/python ../sdk/python/arvados/ 2>&1` raise if $? != 0 else - puts "Warning: epydoc not found, Python documentation will not be generated".colorize(:light_red) + puts "Warning: pdoc3 not found, Python documentation will not be generated".colorize(:light_red) end end diff --git a/lib/config/cmd.go b/lib/config/cmd.go index 1ea0883ac8..347e8519a9 100644 --- a/lib/config/cmd.go +++ b/lib/config/cmd.go @@ -91,6 +91,7 @@ func (checkCommand) RunCommand(prog string, args []string, stdin io.Reader, stdo flags := flag.NewFlagSet("", flag.ContinueOnError) flags.SetOutput(stderr) loader.SetupFlags(flags) + strict := flags.Bool("strict", true, "Strict validation of configuration file (warnings result in non-zero exit code)") err = flags.Parse(args) if err == flag.ErrHelp { @@ -148,15 +149,21 @@ func (checkCommand) RunCommand(prog string, args []string, stdin io.Reader, stdo fmt.Fprintln(stdout, "Your configuration is relying on deprecated entries. Suggest making the following changes.") stdout.Write(diff) err = nil - return 1 + if *strict { + return 1 + } } else if len(diff) > 0 { fmt.Fprintf(stderr, "Unexpected diff output:\n%s", diff) - return 1 + if *strict { + return 1 + } } else if err != nil { return 1 } if logbuf.Len() > 0 { - return 1 + if *strict { + return 1 + } } if problems { diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index b1865a2217..15e7c7c06c 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -938,6 +938,11 @@ Clusters: # Time before repeating SIGTERM when killing a container. TimeoutSignal: 5s + # Time to give up on a process (most likely arv-mount) that + # still holds a container lockfile after its main supervisor + # process has exited, and declare the instance broken. + TimeoutStaleRunLock: 5s + # Time to give up on SIGTERM and write off the worker. TimeoutTERM: 2m diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go index 201ae36045..7ed332151b 100644 --- a/lib/config/generated_config.go +++ b/lib/config/generated_config.go @@ -944,6 +944,11 @@ Clusters: # Time before repeating SIGTERM when killing a container. TimeoutSignal: 5s + # Time to give up on a process (most likely arv-mount) that + # still holds a container lockfile after its main supervisor + # process has exited, and declare the instance broken. + TimeoutStaleRunLock: 5s + # Time to give up on SIGTERM and write off the worker. TimeoutTERM: 2m diff --git a/lib/crunchrun/background.go b/lib/crunchrun/background.go index bf039afa0a..8cdba72c10 100644 --- a/lib/crunchrun/background.go +++ b/lib/crunchrun/background.go @@ -218,6 +218,24 @@ func ListProcesses(stdout, stderr io.Writer) int { return nil } + proc, err := os.FindProcess(pi.PID) + if err != nil { + // FindProcess should have succeeded, even if the + // process does not exist. + fmt.Fprintf(stderr, "%s: find process %d: %s", path, pi.PID, err) + return nil + } + err = proc.Signal(syscall.Signal(0)) + if err != nil { + // Process is dead, even though lockfile was + // still locked. Most likely a stuck arv-mount + // process that inherited the lock from + // crunch-run. Report container UUID as + // "stale". + fmt.Fprintln(stdout, pi.UUID, "stale") + return nil + } + fmt.Fprintln(stdout, pi.UUID) return nil })) diff --git a/lib/dispatchcloud/container/queue.go b/lib/dispatchcloud/container/queue.go index 45b346383f..a1ff414b73 100644 --- a/lib/dispatchcloud/container/queue.go +++ b/lib/dispatchcloud/container/queue.go @@ -382,7 +382,7 @@ func (cq *Queue) poll() (map[string]*arvados.Container, error) { *next[upd.UUID] = upd } } - selectParam := []string{"uuid", "state", "priority", "runtime_constraints", "container_image", "mounts", "scheduling_parameters"} + selectParam := []string{"uuid", "state", "priority", "runtime_constraints", "container_image", "mounts", "scheduling_parameters", "created_at"} limitParam := 1000 mine, err := cq.fetchAll(arvados.ResourceListParams{ diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go index 02b6c976ae..278bcb6657 100644 --- a/lib/dispatchcloud/dispatcher.go +++ b/lib/dispatchcloud/dispatcher.go @@ -181,7 +181,7 @@ func (disp *dispatcher) run() { if pollInterval <= 0 { pollInterval = defaultPollInterval } - sched := scheduler.New(disp.Context, disp.queue, disp.pool, staleLockTimeout, pollInterval) + sched := scheduler.New(disp.Context, disp.queue, disp.pool, disp.Registry, staleLockTimeout, pollInterval) sched.Start() defer sched.Stop() diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go index 6e1850410b..9f1eb098e0 100644 --- a/lib/dispatchcloud/dispatcher_test.go +++ b/lib/dispatchcloud/dispatcher_test.go @@ -66,6 +66,7 @@ func (s *DispatcherSuite) SetUpTest(c *check.C) { ProbeInterval: arvados.Duration(5 * time.Millisecond), MaxProbesPerSecond: 1000, TimeoutSignal: arvados.Duration(3 * time.Millisecond), + TimeoutStaleRunLock: arvados.Duration(3 * time.Millisecond), TimeoutTERM: arvados.Duration(20 * time.Millisecond), ResourceTags: map[string]string{"testtag": "test value"}, TagKeyPrefix: "test:", @@ -169,6 +170,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) { stubvm.ReportBroken = time.Now().Add(time.Duration(rand.Int63n(200)) * time.Millisecond) default: stubvm.CrunchRunCrashRate = 0.1 + stubvm.ArvMountDeadlockRate = 0.1 } } s.stubDriver.Bugf = c.Errorf @@ -221,6 +223,10 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) { c.Check(resp.Body.String(), check.Matches, `(?ms).*time_to_ready_for_container_seconds{quantile="0.95"} [0-9.]*`) c.Check(resp.Body.String(), check.Matches, `(?ms).*time_to_ready_for_container_seconds_count [0-9]*`) c.Check(resp.Body.String(), check.Matches, `(?ms).*time_to_ready_for_container_seconds_sum [0-9.]*`) + c.Check(resp.Body.String(), check.Matches, `(?ms).*time_from_shutdown_request_to_disappearance_seconds_count [0-9]*`) + c.Check(resp.Body.String(), check.Matches, `(?ms).*time_from_shutdown_request_to_disappearance_seconds_sum [0-9.]*`) + c.Check(resp.Body.String(), check.Matches, `(?ms).*time_from_queue_to_crunch_run_seconds_count [0-9]*`) + c.Check(resp.Body.String(), check.Matches, `(?ms).*time_from_queue_to_crunch_run_seconds_sum [0-9e+.]*`) } func (s *DispatcherSuite) TestAPIPermissions(c *check.C) { diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go index 0e8e1dc2ec..b9d653a821 100644 --- a/lib/dispatchcloud/scheduler/run_queue.go +++ b/lib/dispatchcloud/scheduler/run_queue.go @@ -33,6 +33,7 @@ func (sch *Scheduler) runQueue() { dontstart := map[arvados.InstanceType]bool{} var overquota []container.QueueEnt // entries that are unmappable because of worker pool quota + var containerAllocatedWorkerBootingCount int tryrun: for i, ctr := range sorted { @@ -92,11 +93,15 @@ tryrun: } else if sch.pool.StartContainer(it, ctr) { // Success. } else { + containerAllocatedWorkerBootingCount += 1 dontstart[it] = true } } } + sch.mContainersAllocatedNotStarted.Set(float64(containerAllocatedWorkerBootingCount)) + sch.mContainersNotAllocatedOverQuota.Set(float64(len(overquota))) + if len(overquota) > 0 { // Unlock any containers that are unmappable while // we're at quota. diff --git a/lib/dispatchcloud/scheduler/run_queue_test.go b/lib/dispatchcloud/scheduler/run_queue_test.go index 530eb5db93..fd1d0a870b 100644 --- a/lib/dispatchcloud/scheduler/run_queue_test.go +++ b/lib/dispatchcloud/scheduler/run_queue_test.go @@ -13,6 +13,9 @@ import ( "git.arvados.org/arvados.git/lib/dispatchcloud/worker" "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/ctxlog" + + "github.com/prometheus/client_golang/prometheus/testutil" + check "gopkg.in/check.v1" ) @@ -185,7 +188,7 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) { running: map[string]time.Time{}, canCreate: 0, } - New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue() + New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond).runQueue() c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(1), test.InstanceType(1), test.InstanceType(1)}) c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(4)}) c.Check(pool.running, check.HasLen, 1) @@ -241,7 +244,7 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) { starts: []string{}, canCreate: 0, } - New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue() + New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond).runQueue() c.Check(pool.creates, check.DeepEquals, shouldCreate) if len(shouldCreate) == 0 { c.Check(pool.starts, check.DeepEquals, []string{}) @@ -336,7 +339,7 @@ func (*SchedulerSuite) TestStartWhileCreating(c *check.C) { }, } queue.Update() - New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue() + New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond).runQueue() c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(2), test.InstanceType(1)}) c.Check(pool.starts, check.DeepEquals, []string{uuids[6], uuids[5], uuids[3], uuids[2]}) running := map[string]bool{} @@ -380,10 +383,87 @@ func (*SchedulerSuite) TestKillNonexistentContainer(c *check.C) { }, } queue.Update() - sch := New(ctx, &queue, &pool, time.Millisecond, time.Millisecond) + sch := New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) c.Check(pool.running, check.HasLen, 1) sch.sync() for deadline := time.Now().Add(time.Second); len(pool.Running()) > 0 && time.Now().Before(deadline); time.Sleep(time.Millisecond) { } c.Check(pool.Running(), check.HasLen, 0) } + +func (*SchedulerSuite) TestContainersMetrics(c *check.C) { + ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c)) + queue := test.Queue{ + ChooseType: chooseType, + Containers: []arvados.Container{ + { + UUID: test.ContainerUUID(1), + Priority: 1, + State: arvados.ContainerStateLocked, + CreatedAt: time.Now().Add(-10 * time.Second), + RuntimeConstraints: arvados.RuntimeConstraints{ + VCPUs: 1, + RAM: 1 << 30, + }, + }, + }, + } + queue.Update() + + // Create a pool with one unallocated (idle/booting/unknown) worker, + // and `idle` and `unknown` not set (empty). Iow this worker is in the booting + // state, and the container will be allocated but not started yet. + pool := stubPool{ + unalloc: map[arvados.InstanceType]int{test.InstanceType(1): 1}, + } + sch := New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) + sch.runQueue() + sch.updateMetrics() + + c.Check(int(testutil.ToFloat64(sch.mContainersAllocatedNotStarted)), check.Equals, 1) + c.Check(int(testutil.ToFloat64(sch.mContainersNotAllocatedOverQuota)), check.Equals, 0) + c.Check(int(testutil.ToFloat64(sch.mLongestWaitTimeSinceQueue)), check.Equals, 10) + + // Create a pool without workers. The queued container will not be started, and the + // 'over quota' metric will be 1 because no workers are available and canCreate defaults + // to zero. + pool = stubPool{} + sch = New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) + sch.runQueue() + sch.updateMetrics() + + c.Check(int(testutil.ToFloat64(sch.mContainersAllocatedNotStarted)), check.Equals, 0) + c.Check(int(testutil.ToFloat64(sch.mContainersNotAllocatedOverQuota)), check.Equals, 1) + c.Check(int(testutil.ToFloat64(sch.mLongestWaitTimeSinceQueue)), check.Equals, 10) + + // Reset the queue, and create a pool with an idle worker. The queued + // container will be started immediately and mLongestWaitTimeSinceQueue + // should be zero. + queue = test.Queue{ + ChooseType: chooseType, + Containers: []arvados.Container{ + { + UUID: test.ContainerUUID(1), + Priority: 1, + State: arvados.ContainerStateLocked, + CreatedAt: time.Now().Add(-10 * time.Second), + RuntimeConstraints: arvados.RuntimeConstraints{ + VCPUs: 1, + RAM: 1 << 30, + }, + }, + }, + } + queue.Update() + + pool = stubPool{ + idle: map[arvados.InstanceType]int{test.InstanceType(1): 1}, + unalloc: map[arvados.InstanceType]int{test.InstanceType(1): 1}, + running: map[string]time.Time{}, + } + sch = New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) + sch.runQueue() + sch.updateMetrics() + + c.Check(int(testutil.ToFloat64(sch.mLongestWaitTimeSinceQueue)), check.Equals, 0) +} diff --git a/lib/dispatchcloud/scheduler/scheduler.go b/lib/dispatchcloud/scheduler/scheduler.go index 6409ea031a..c3e67dd11f 100644 --- a/lib/dispatchcloud/scheduler/scheduler.go +++ b/lib/dispatchcloud/scheduler/scheduler.go @@ -11,7 +11,9 @@ import ( "sync" "time" + "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/ctxlog" + "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" ) @@ -31,6 +33,7 @@ type Scheduler struct { logger logrus.FieldLogger queue ContainerQueue pool WorkerPool + reg *prometheus.Registry staleLockTimeout time.Duration queueUpdateInterval time.Duration @@ -41,17 +44,22 @@ type Scheduler struct { runOnce sync.Once stop chan struct{} stopped chan struct{} + + mContainersAllocatedNotStarted prometheus.Gauge + mContainersNotAllocatedOverQuota prometheus.Gauge + mLongestWaitTimeSinceQueue prometheus.Gauge } // New returns a new unstarted Scheduler. // // Any given queue and pool should not be used by more than one // scheduler at a time. -func New(ctx context.Context, queue ContainerQueue, pool WorkerPool, staleLockTimeout, queueUpdateInterval time.Duration) *Scheduler { - return &Scheduler{ +func New(ctx context.Context, queue ContainerQueue, pool WorkerPool, reg *prometheus.Registry, staleLockTimeout, queueUpdateInterval time.Duration) *Scheduler { + sch := &Scheduler{ logger: ctxlog.FromContext(ctx), queue: queue, pool: pool, + reg: reg, staleLockTimeout: staleLockTimeout, queueUpdateInterval: queueUpdateInterval, wakeup: time.NewTimer(time.Second), @@ -59,6 +67,59 @@ func New(ctx context.Context, queue ContainerQueue, pool WorkerPool, staleLockTi stopped: make(chan struct{}), uuidOp: map[string]string{}, } + sch.registerMetrics(reg) + return sch +} + +func (sch *Scheduler) registerMetrics(reg *prometheus.Registry) { + if reg == nil { + reg = prometheus.NewRegistry() + } + sch.mContainersAllocatedNotStarted = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "arvados", + Subsystem: "dispatchcloud", + Name: "containers_allocated_not_started", + Help: "Number of containers allocated to a worker but not started yet (worker is booting).", + }) + reg.MustRegister(sch.mContainersAllocatedNotStarted) + sch.mContainersNotAllocatedOverQuota = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "arvados", + Subsystem: "dispatchcloud", + Name: "containers_not_allocated_over_quota", + Help: "Number of containers not allocated to a worker because the system has hit a quota.", + }) + reg.MustRegister(sch.mContainersNotAllocatedOverQuota) + sch.mLongestWaitTimeSinceQueue = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "arvados", + Subsystem: "dispatchcloud", + Name: "containers_longest_wait_time_seconds", + Help: "Current longest wait time of any container since queuing, and before the start of crunch-run.", + }) + reg.MustRegister(sch.mLongestWaitTimeSinceQueue) +} + +func (sch *Scheduler) updateMetrics() { + earliest := time.Time{} + entries, _ := sch.queue.Entries() + running := sch.pool.Running() + for _, ent := range entries { + if ent.Container.Priority > 0 && + (ent.Container.State == arvados.ContainerStateQueued || ent.Container.State == arvados.ContainerStateLocked) { + // Exclude containers that are preparing to run the payload (i.e. + // ContainerStateLocked and running on a worker, most likely loading the + // payload image + if _, ok := running[ent.Container.UUID]; !ok { + if ent.Container.CreatedAt.Before(earliest) || earliest.IsZero() { + earliest = ent.Container.CreatedAt + } + } + } + } + if !earliest.IsZero() { + sch.mLongestWaitTimeSinceQueue.Set(time.Since(earliest).Seconds()) + } else { + sch.mLongestWaitTimeSinceQueue.Set(0) + } } // Start starts the scheduler. @@ -113,6 +174,7 @@ func (sch *Scheduler) run() { for { sch.runQueue() sch.sync() + sch.updateMetrics() select { case <-sch.stop: return diff --git a/lib/dispatchcloud/scheduler/sync_test.go b/lib/dispatchcloud/scheduler/sync_test.go index 538f5ea8cf..a3ff0636e1 100644 --- a/lib/dispatchcloud/scheduler/sync_test.go +++ b/lib/dispatchcloud/scheduler/sync_test.go @@ -48,7 +48,7 @@ func (*SchedulerSuite) TestForgetIrrelevantContainers(c *check.C) { ents, _ := queue.Entries() c.Check(ents, check.HasLen, 1) - sch := New(ctx, &queue, &pool, time.Millisecond, time.Millisecond) + sch := New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) sch.sync() ents, _ = queue.Entries() @@ -80,7 +80,7 @@ func (*SchedulerSuite) TestCancelOrphanedContainers(c *check.C) { ents, _ := queue.Entries() c.Check(ents, check.HasLen, 1) - sch := New(ctx, &queue, &pool, time.Millisecond, time.Millisecond) + sch := New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) // Sync shouldn't cancel the container because it might be // running on the VM with state=="unknown". diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go index 132bd4d695..4d32cf221c 100644 --- a/lib/dispatchcloud/test/stub_driver.go +++ b/lib/dispatchcloud/test/stub_driver.go @@ -131,7 +131,7 @@ func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID, tags: copyTags(tags), providerType: it.ProviderType, initCommand: cmd, - running: map[string]int64{}, + running: map[string]stubProcess{}, killing: map[string]bool{}, } svm.SSHService = SSHService{ @@ -189,6 +189,8 @@ type StubVM struct { CrunchRunMissing bool CrunchRunCrashRate float64 CrunchRunDetachDelay time.Duration + ArvMountMaxExitLag time.Duration + ArvMountDeadlockRate float64 ExecuteContainer func(arvados.Container) int CrashRunningContainer func(arvados.Container) @@ -198,12 +200,21 @@ type StubVM struct { initCommand cloud.InitCommand providerType string SSHService SSHService - running map[string]int64 + running map[string]stubProcess killing map[string]bool lastPID int64 + deadlocked string sync.Mutex } +type stubProcess struct { + pid int64 + + // crunch-run has exited, but arv-mount process (or something) + // still holds lock in /var/run/ + exited bool +} + func (svm *StubVM) Instance() stubInstance { svm.Lock() defer svm.Unlock() @@ -256,7 +267,7 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader, svm.Lock() svm.lastPID++ pid := svm.lastPID - svm.running[uuid] = pid + svm.running[uuid] = stubProcess{pid: pid} svm.Unlock() time.Sleep(svm.CrunchRunDetachDelay) fmt.Fprintf(stderr, "starting %s\n", uuid) @@ -273,14 +284,13 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader, logger.Print("[test] exiting crunch-run stub") svm.Lock() defer svm.Unlock() - if svm.running[uuid] != pid { + if svm.running[uuid].pid != pid { bugf := svm.sis.driver.Bugf if bugf == nil { bugf = logger.Warnf } - bugf("[test] StubDriver bug or caller bug: pid %d exiting, running[%s]==%d", pid, uuid, svm.running[uuid]) - } else { - delete(svm.running, uuid) + bugf("[test] StubDriver bug or caller bug: pid %d exiting, running[%s].pid==%d", pid, uuid, svm.running[uuid].pid) + return } if !completed { logger.WithField("State", ctr.State).Print("[test] crashing crunch-run stub") @@ -288,6 +298,15 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader, svm.CrashRunningContainer(ctr) } } + sproc := svm.running[uuid] + sproc.exited = true + svm.running[uuid] = sproc + svm.Unlock() + time.Sleep(svm.ArvMountMaxExitLag * time.Duration(math_rand.Float64())) + svm.Lock() + if math_rand.Float64() >= svm.ArvMountDeadlockRate { + delete(svm.running, uuid) + } }() crashluck := math_rand.Float64() @@ -333,26 +352,31 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader, if command == "crunch-run --list" { svm.Lock() defer svm.Unlock() - for uuid := range svm.running { - fmt.Fprintf(stdout, "%s\n", uuid) + for uuid, sproc := range svm.running { + if sproc.exited { + fmt.Fprintf(stdout, "%s stale\n", uuid) + } else { + fmt.Fprintf(stdout, "%s\n", uuid) + } } if !svm.ReportBroken.IsZero() && svm.ReportBroken.Before(time.Now()) { fmt.Fprintln(stdout, "broken") } + fmt.Fprintln(stdout, svm.deadlocked) return 0 } if strings.HasPrefix(command, "crunch-run --kill ") { svm.Lock() - _, running := svm.running[uuid] - if running { + sproc, running := svm.running[uuid] + if running && !sproc.exited { svm.killing[uuid] = true svm.Unlock() time.Sleep(time.Duration(math_rand.Float64()*2) * time.Millisecond) svm.Lock() - _, running = svm.running[uuid] + sproc, running = svm.running[uuid] } svm.Unlock() - if running { + if running && !sproc.exited { fmt.Fprintf(stderr, "%s: container is running\n", uuid) return 1 } diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go index 086887cb44..c6eaeae2b6 100644 --- a/lib/dispatchcloud/worker/pool.go +++ b/lib/dispatchcloud/worker/pool.go @@ -64,15 +64,16 @@ type Executor interface { } const ( - defaultSyncInterval = time.Minute - defaultProbeInterval = time.Second * 10 - defaultMaxProbesPerSecond = 10 - defaultTimeoutIdle = time.Minute - defaultTimeoutBooting = time.Minute * 10 - defaultTimeoutProbe = time.Minute * 10 - defaultTimeoutShutdown = time.Second * 10 - defaultTimeoutTERM = time.Minute * 2 - defaultTimeoutSignal = time.Second * 5 + defaultSyncInterval = time.Minute + defaultProbeInterval = time.Second * 10 + defaultMaxProbesPerSecond = 10 + defaultTimeoutIdle = time.Minute + defaultTimeoutBooting = time.Minute * 10 + defaultTimeoutProbe = time.Minute * 10 + defaultTimeoutShutdown = time.Second * 10 + defaultTimeoutTERM = time.Minute * 2 + defaultTimeoutSignal = time.Second * 5 + defaultTimeoutStaleRunLock = time.Second * 5 // Time after a quota error to try again anyway, even if no // instances have been shutdown. @@ -115,6 +116,7 @@ func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *promethe timeoutShutdown: duration(cluster.Containers.CloudVMs.TimeoutShutdown, defaultTimeoutShutdown), timeoutTERM: duration(cluster.Containers.CloudVMs.TimeoutTERM, defaultTimeoutTERM), timeoutSignal: duration(cluster.Containers.CloudVMs.TimeoutSignal, defaultTimeoutSignal), + timeoutStaleRunLock: duration(cluster.Containers.CloudVMs.TimeoutStaleRunLock, defaultTimeoutStaleRunLock), installPublicKey: installPublicKey, tagKeyPrefix: cluster.Containers.CloudVMs.TagKeyPrefix, stop: make(chan bool), @@ -152,6 +154,7 @@ type Pool struct { timeoutShutdown time.Duration timeoutTERM time.Duration timeoutSignal time.Duration + timeoutStaleRunLock time.Duration installPublicKey ssh.PublicKey tagKeyPrefix string @@ -170,15 +173,17 @@ type Pool struct { runnerMD5 [md5.Size]byte runnerCmd string - mContainersRunning prometheus.Gauge - mInstances *prometheus.GaugeVec - mInstancesPrice *prometheus.GaugeVec - mVCPUs *prometheus.GaugeVec - mMemory *prometheus.GaugeVec - mBootOutcomes *prometheus.CounterVec - mDisappearances *prometheus.CounterVec - mTimeToSSH prometheus.Summary - mTimeToReadyForContainer prometheus.Summary + mContainersRunning prometheus.Gauge + mInstances *prometheus.GaugeVec + mInstancesPrice *prometheus.GaugeVec + mVCPUs *prometheus.GaugeVec + mMemory *prometheus.GaugeVec + mBootOutcomes *prometheus.CounterVec + mDisappearances *prometheus.CounterVec + mTimeToSSH prometheus.Summary + mTimeToReadyForContainer prometheus.Summary + mTimeFromShutdownToGone prometheus.Summary + mTimeFromQueueToCrunchRun prometheus.Summary } type createCall struct { @@ -661,6 +666,22 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) { Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001}, }) reg.MustRegister(wp.mTimeToReadyForContainer) + wp.mTimeFromShutdownToGone = prometheus.NewSummary(prometheus.SummaryOpts{ + Namespace: "arvados", + Subsystem: "dispatchcloud", + Name: "instances_time_from_shutdown_request_to_disappearance_seconds", + Help: "Number of seconds between the first shutdown attempt and the disappearance of the worker.", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001}, + }) + reg.MustRegister(wp.mTimeFromShutdownToGone) + wp.mTimeFromQueueToCrunchRun = prometheus.NewSummary(prometheus.SummaryOpts{ + Namespace: "arvados", + Subsystem: "dispatchcloud", + Name: "containers_time_from_queue_to_crunch_run_seconds", + Help: "Number of seconds between the queuing of a container and the start of crunch-run.", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001}, + }) + reg.MustRegister(wp.mTimeFromQueueToCrunchRun) } func (wp *Pool) runMetrics() { @@ -930,6 +951,10 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) { if wp.mDisappearances != nil { wp.mDisappearances.WithLabelValues(stateString[wkr.state]).Inc() } + // wkr.destroyed.IsZero() can happen if instance disappeared but we weren't trying to shut it down + if wp.mTimeFromShutdownToGone != nil && !wkr.destroyed.IsZero() { + wp.mTimeFromShutdownToGone.Observe(time.Now().Sub(wkr.destroyed).Seconds()) + } delete(wp.workers, id) go wkr.Close() notify = true diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go index 9199d4bafe..5b145d7c65 100644 --- a/lib/dispatchcloud/worker/worker.go +++ b/lib/dispatchcloud/worker/worker.go @@ -110,6 +110,7 @@ type worker struct { probing chan struct{} bootOutcomeReported bool timeToReadyReported bool + staleRunLockSince time.Time } func (wkr *worker) onUnkillable(uuid string) { @@ -176,6 +177,9 @@ func (wkr *worker) startContainer(ctr arvados.Container) { } go func() { rr.Start() + if wkr.wp.mTimeFromQueueToCrunchRun != nil { + wkr.wp.mTimeFromQueueToCrunchRun.Observe(time.Since(ctr.CreatedAt).Seconds()) + } wkr.mtx.Lock() defer wkr.mtx.Unlock() now := time.Now() @@ -382,13 +386,43 @@ func (wkr *worker) probeRunning() (running []string, reportsBroken, ok bool) { return } ok = true + + staleRunLock := false for _, s := range strings.Split(string(stdout), "\n") { - if s == "broken" { + // Each line of the "crunch-run --list" output is one + // of the following: + // + // * a container UUID, indicating that processes + // related to that container are currently running. + // Optionally followed by " stale", indicating that + // the crunch-run process itself has exited (the + // remaining process is probably arv-mount). + // + // * the string "broken", indicating that the instance + // appears incapable of starting containers. + // + // See ListProcesses() in lib/crunchrun/background.go. + if s == "" { + // empty string following final newline + } else if s == "broken" { reportsBroken = true - } else if s != "" { + } else if toks := strings.Split(s, " "); len(toks) == 1 { running = append(running, s) + } else if toks[1] == "stale" { + wkr.logger.WithField("ContainerUUID", toks[0]).Info("probe reported stale run lock") + staleRunLock = true } } + wkr.mtx.Lock() + defer wkr.mtx.Unlock() + if !staleRunLock { + wkr.staleRunLockSince = time.Time{} + } else if wkr.staleRunLockSince.IsZero() { + wkr.staleRunLockSince = time.Now() + } else if dur := time.Now().Sub(wkr.staleRunLockSince); dur > wkr.wp.timeoutStaleRunLock { + wkr.logger.WithField("Duration", dur).Warn("reporting broken after reporting stale run lock for too long") + reportsBroken = true + } return } diff --git a/lib/install/deps.go b/lib/install/deps.go index ba57c20c35..93a0ce452b 100644 --- a/lib/install/deps.go +++ b/lib/install/deps.go @@ -104,7 +104,7 @@ func (installCommand) RunCommand(prog string, args []string, stdin io.Reader, st "ca-certificates", "cadaver", "curl", - "cython", + "cython3", "daemontools", // lib/boot uses setuidgid to drop privileges when running as root "default-jdk-headless", "default-jre-headless", @@ -127,7 +127,6 @@ func (installCommand) RunCommand(prog string, args []string, stdin io.Reader, st "libpam-dev", "libpcre3-dev", "libpq-dev", - "libpython2.7-dev", "libreadline-dev", "libssl-dev", "libwww-perl", @@ -142,13 +141,12 @@ func (installCommand) RunCommand(prog string, args []string, stdin io.Reader, st "pkg-config", "postgresql", "postgresql-contrib", - "python", "python3-dev", - "python-epydoc", "r-base", "r-cran-testthat", "sudo", - "virtualenv", + "python3-virtualenv", + "python3-venv", "wget", "xvfb", "zlib1g-dev", diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go index 363d09dafb..394e30a737 100644 --- a/sdk/go/arvados/config.go +++ b/sdk/go/arvados/config.go @@ -462,6 +462,7 @@ type CloudVMsConfig struct { TimeoutProbe Duration TimeoutShutdown Duration TimeoutSignal Duration + TimeoutStaleRunLock Duration TimeoutTERM Duration ResourceTags map[string]string TagKeyPrefix string diff --git a/sdk/python/arvados/commands/arv_copy.py b/sdk/python/arvados/commands/arv_copy.py index 5f12b62eeb..93fd6b598a 100755 --- a/sdk/python/arvados/commands/arv_copy.py +++ b/sdk/python/arvados/commands/arv_copy.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -# arv-copy [--recursive] [--no-recursive] object-uuid src dst +# arv-copy [--recursive] [--no-recursive] object-uuid # # Copies an object from Arvados instance src to instance dst. # @@ -34,6 +34,7 @@ import sys import logging import tempfile import urllib.parse +import io import arvados import arvados.config @@ -87,17 +88,17 @@ def main(): '-f', '--force', dest='force', action='store_true', help='Perform copy even if the object appears to exist at the remote destination.') copy_opts.add_argument( - '--src', dest='source_arvados', required=True, + '--src', dest='source_arvados', help='The name of the source Arvados instance (required) - points at an Arvados config file. May be either a pathname to a config file, or (for example) "foo" as shorthand for $HOME/.config/arvados/foo.conf.') copy_opts.add_argument( - '--dst', dest='destination_arvados', required=True, + '--dst', dest='destination_arvados', help='The name of the destination Arvados instance (required) - points at an Arvados config file. May be either a pathname to a config file, or (for example) "foo" as shorthand for $HOME/.config/arvados/foo.conf.') copy_opts.add_argument( '--recursive', dest='recursive', action='store_true', - help='Recursively copy any dependencies for this object. (default)') + help='Recursively copy any dependencies for this object, and subprojects. (default)') copy_opts.add_argument( '--no-recursive', dest='recursive', action='store_false', - help='Do not copy any dependencies. NOTE: if this option is given, the copied object will need to be updated manually in order to be functional.') + help='Do not copy any dependencies or subprojects.') copy_opts.add_argument( '--project-uuid', dest='project_uuid', help='The UUID of the project at the destination to which the collection or workflow should be copied.') @@ -118,6 +119,9 @@ def main(): else: logger.setLevel(logging.INFO) + if not args.source_arvados: + args.source_arvados = args.object_uuid[:5] + # Create API clients for the source and destination instances src_arv = api_for_instance(args.source_arvados) dst_arv = api_for_instance(args.destination_arvados) @@ -135,6 +139,9 @@ def main(): elif t == 'Workflow': set_src_owner_uuid(src_arv.workflows(), args.object_uuid, args) result = copy_workflow(args.object_uuid, src_arv, dst_arv, args) + elif t == 'Group': + set_src_owner_uuid(src_arv.groups(), args.object_uuid, args) + result = copy_project(args.object_uuid, src_arv, dst_arv, args.project_uuid, args) else: abort("cannot copy object {} of type {}".format(args.object_uuid, t)) @@ -170,6 +177,10 @@ def set_src_owner_uuid(resource, uuid, args): # $HOME/.config/arvados/instance_name.conf # def api_for_instance(instance_name): + if not instance_name: + # Use environment + return arvados.api('v1', model=OrderedJsonModel()) + if '/' in instance_name: config_file = instance_name else: @@ -296,7 +307,14 @@ def copy_workflow(wf_uuid, src, dst, args): # copy the workflow itself del wf['uuid'] wf['owner_uuid'] = args.project_uuid - return dst.workflows().create(body=wf).execute(num_retries=args.retries) + + existing = dst.workflows().list(filters=[["owner_uuid", "=", args.project_uuid], + ["name", "=", wf["name"]]]).execute() + if len(existing["items"]) == 0: + return dst.workflows().create(body=wf).execute(num_retries=args.retries) + else: + return dst.workflows().update(uuid=existing["items"][0]["uuid"], body=wf).execute(num_retries=args.retries) + def workflow_collections(obj, locations, docker_images): if isinstance(obj, dict): @@ -305,7 +323,7 @@ def workflow_collections(obj, locations, docker_images): if loc.startswith("keep:"): locations.append(loc[5:]) - docker_image = obj.get('dockerImageId', None) or obj.get('dockerPull', None) + docker_image = obj.get('dockerImageId', None) or obj.get('dockerPull', None) or obj.get('acrContainerImage', None) if docker_image is not None: ds = docker_image.split(":", 1) tag = ds[1] if len(ds)==2 else 'latest' @@ -516,7 +534,7 @@ def copy_collection(obj_uuid, src, dst, args): # a new manifest as we go. src_keep = arvados.keep.KeepClient(api_client=src, num_retries=args.retries) dst_keep = arvados.keep.KeepClient(api_client=dst, num_retries=args.retries) - dst_manifest = "" + dst_manifest = io.StringIO() dst_locators = {} bytes_written = 0 bytes_expected = total_collection_size(manifest) @@ -527,14 +545,15 @@ def copy_collection(obj_uuid, src, dst, args): for line in manifest.splitlines(): words = line.split() - dst_manifest += words[0] + dst_manifest.write(words[0]) for word in words[1:]: try: loc = arvados.KeepLocator(word) except ValueError: # If 'word' can't be parsed as a locator, # presume it's a filename. - dst_manifest += ' ' + word + dst_manifest.write(' ') + dst_manifest.write(word) continue blockhash = loc.md5sum # copy this block if we haven't seen it before @@ -547,17 +566,18 @@ def copy_collection(obj_uuid, src, dst, args): dst_locator = dst_keep.put(data) dst_locators[blockhash] = dst_locator bytes_written += loc.size - dst_manifest += ' ' + dst_locators[blockhash] - dst_manifest += "\n" + dst_manifest.write(' ') + dst_manifest.write(dst_locators[blockhash]) + dst_manifest.write("\n") if progress_writer: progress_writer.report(obj_uuid, bytes_written, bytes_expected) progress_writer.finish() # Copy the manifest and save the collection. - logger.debug('saving %s with manifest: <%s>', obj_uuid, dst_manifest) + logger.debug('saving %s with manifest: <%s>', obj_uuid, dst_manifest.getvalue()) - c['manifest_text'] = dst_manifest + c['manifest_text'] = dst_manifest.getvalue() return create_collection_from(c, src, dst, args) def select_git_url(api, repo_name, retries, allow_insecure_http, allow_insecure_http_opt): @@ -632,6 +652,42 @@ def copy_docker_image(docker_image, docker_image_tag, src, dst, args): else: logger.warning('Could not find docker image {}:{}'.format(docker_image, docker_image_tag)) +def copy_project(obj_uuid, src, dst, owner_uuid, args): + + src_project_record = src.groups().get(uuid=obj_uuid).execute(num_retries=args.retries) + + # Create/update the destination project + existing = dst.groups().list(filters=[["owner_uuid", "=", owner_uuid], + ["name", "=", src_project_record["name"]]]).execute(num_retries=args.retries) + if len(existing["items"]) == 0: + project_record = dst.groups().create(body={"group": {"group_class": "project", + "owner_uuid": owner_uuid, + "name": src_project_record["name"]}}).execute(num_retries=args.retries) + else: + project_record = existing["items"][0] + + dst.groups().update(uuid=project_record["uuid"], + body={"group": { + "description": src_project_record["description"]}}).execute(num_retries=args.retries) + + args.project_uuid = project_record["uuid"] + + logger.debug('Copying %s to %s', obj_uuid, project_record["uuid"]) + + # Copy collections + copy_collections([col["uuid"] for col in arvados.util.list_all(src.collections().list, filters=[["owner_uuid", "=", obj_uuid]])], + src, dst, args) + + # Copy workflows + for w in arvados.util.list_all(src.workflows().list, filters=[["owner_uuid", "=", obj_uuid]]): + copy_workflow(w["uuid"], src, dst, args) + + if args.recursive: + for g in arvados.util.list_all(src.groups().list, filters=[["owner_uuid", "=", obj_uuid]]): + copy_project(g["uuid"], src, dst, project_record["uuid"], args) + + return project_record + # git_rev_parse(rev, repo) # # Returns the 40-character commit hash corresponding to 'rev' in @@ -654,7 +710,7 @@ def git_rev_parse(rev, repo): # Special case: if handed a Keep locator hash, return 'Collection'. # def uuid_type(api, object_uuid): - if re.match(r'^[a-f0-9]{32}\+[0-9]+(\+[A-Za-z0-9+-]+)?$', object_uuid): + if re.match(arvados.util.keep_locator_pattern, object_uuid): return 'Collection' p = object_uuid.split('-') if len(p) == 3: diff --git a/sdk/python/tests/run_test_server.py b/sdk/python/tests/run_test_server.py index f4c1230cc8..0cb4151ac3 100644 --- a/sdk/python/tests/run_test_server.py +++ b/sdk/python/tests/run_test_server.py @@ -43,6 +43,14 @@ import arvados.config ARVADOS_DIR = os.path.realpath(os.path.join(MY_DIRNAME, '../../..')) SERVICES_SRC_DIR = os.path.join(ARVADOS_DIR, 'services') + +# Work around https://bugs.python.org/issue27805, should be no longer +# necessary from sometime in Python 3.8.x +if not os.environ.get('ARVADOS_DEBUG', ''): + WRITE_MODE = 'a' +else: + WRITE_MODE = 'w' + if 'GOPATH' in os.environ: # Add all GOPATH bin dirs to PATH -- but insert them after the # ruby gems bin dir, to ensure "bundle" runs the Ruby bundler @@ -327,7 +335,7 @@ def run(leave_running_atexit=False): env.pop('ARVADOS_API_HOST', None) env.pop('ARVADOS_API_HOST_INSECURE', None) env.pop('ARVADOS_API_TOKEN', None) - logf = open(_logfilename('railsapi'), 'a') + logf = open(_logfilename('railsapi'), WRITE_MODE) railsapi = subprocess.Popen( ['bundle', 'exec', 'passenger', 'start', '-p{}'.format(port), @@ -409,7 +417,7 @@ def run_controller(): if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ: return stop_controller() - logf = open(_logfilename('controller'), 'a') + logf = open(_logfilename('controller'), WRITE_MODE) port = internal_port_from_config("Controller") controller = subprocess.Popen( ["arvados-server", "controller"], @@ -429,7 +437,7 @@ def run_ws(): return stop_ws() port = internal_port_from_config("Websocket") - logf = open(_logfilename('ws'), 'a') + logf = open(_logfilename('ws'), WRITE_MODE) ws = subprocess.Popen( ["arvados-server", "ws"], stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True) @@ -462,7 +470,7 @@ def _start_keep(n, blob_signing=False): yaml.safe_dump(confdata, f) keep_cmd = ["keepstore", "-config", conf] - with open(_logfilename('keep{}'.format(n)), 'a') as logf: + with open(_logfilename('keep{}'.format(n)), WRITE_MODE) as logf: with open('/dev/null') as _stdin: child = subprocess.Popen( keep_cmd, stdin=_stdin, stdout=logf, stderr=logf, close_fds=True) @@ -529,7 +537,7 @@ def run_keep_proxy(): port = internal_port_from_config("Keepproxy") env = os.environ.copy() env['ARVADOS_API_TOKEN'] = auth_token('anonymous') - logf = open(_logfilename('keepproxy'), 'a') + logf = open(_logfilename('keepproxy'), WRITE_MODE) kp = subprocess.Popen( ['keepproxy'], env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True) @@ -568,7 +576,7 @@ def run_arv_git_httpd(): gitport = internal_port_from_config("GitHTTP") env = os.environ.copy() env.pop('ARVADOS_API_TOKEN', None) - logf = open(_logfilename('arv-git-httpd'), 'a') + logf = open(_logfilename('arv-git-httpd'), WRITE_MODE) agh = subprocess.Popen(['arv-git-httpd'], env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf) with open(_pidfile('arv-git-httpd'), 'w') as f: @@ -587,7 +595,7 @@ def run_keep_web(): keepwebport = internal_port_from_config("WebDAV") env = os.environ.copy() - logf = open(_logfilename('keep-web'), 'a') + logf = open(_logfilename('keep-web'), WRITE_MODE) keepweb = subprocess.Popen( ['keep-web'], env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf) diff --git a/sdk/python/tests/test_arv_copy.py b/sdk/python/tests/test_arv_copy.py index 324d6e05d7..452c2beba2 100644 --- a/sdk/python/tests/test_arv_copy.py +++ b/sdk/python/tests/test_arv_copy.py @@ -7,11 +7,18 @@ import os import sys import tempfile import unittest +import shutil +import arvados.api +from arvados.collection import Collection, CollectionReader import arvados.commands.arv_copy as arv_copy from . import arvados_testutil as tutil +from . import run_test_server + +class ArvCopyVersionTestCase(run_test_server.TestCaseWithServers, tutil.VersionChecker): + MAIN_SERVER = {} + KEEP_SERVER = {} -class ArvCopyTestCase(unittest.TestCase, tutil.VersionChecker): def run_copy(self, args): sys.argv = ['arv-copy'] + args return arv_copy.main() @@ -26,3 +33,50 @@ class ArvCopyTestCase(unittest.TestCase, tutil.VersionChecker): with self.assertRaises(SystemExit): self.run_copy(['--version']) self.assertVersionOutput(out, err) + + def test_copy_project(self): + api = arvados.api() + src_proj = api.groups().create(body={"group": {"name": "arv-copy project", "group_class": "project"}}).execute()["uuid"] + + c = Collection() + with c.open('foo', 'wt') as f: + f.write('foo') + c.save_new("arv-copy foo collection", owner_uuid=src_proj) + + dest_proj = api.groups().create(body={"group": {"name": "arv-copy dest project", "group_class": "project"}}).execute()["uuid"] + + tmphome = tempfile.mkdtemp() + home_was = os.environ['HOME'] + os.environ['HOME'] = tmphome + try: + cfgdir = os.path.join(tmphome, ".config", "arvados") + os.makedirs(cfgdir) + with open(os.path.join(cfgdir, "zzzzz.conf"), "wt") as f: + f.write("ARVADOS_API_HOST=%s\n" % os.environ["ARVADOS_API_HOST"]) + f.write("ARVADOS_API_TOKEN=%s\n" % os.environ["ARVADOS_API_TOKEN"]) + f.write("ARVADOS_API_HOST_INSECURE=1\n") + + contents = api.groups().list(filters=[["owner_uuid", "=", dest_proj]]).execute() + assert len(contents["items"]) == 0 + + try: + self.run_copy(["--project-uuid", dest_proj, src_proj]) + except SystemExit as e: + assert e.code == 0 + + contents = api.groups().list(filters=[["owner_uuid", "=", dest_proj]]).execute() + assert len(contents["items"]) == 1 + + assert contents["items"][0]["name"] == "arv-copy project" + copied_project = contents["items"][0]["uuid"] + + contents = api.collections().list(filters=[["owner_uuid", "=", copied_project]]).execute() + assert len(contents["items"]) == 1 + + assert contents["items"][0]["uuid"] != c.manifest_locator() + assert contents["items"][0]["name"] == "arv-copy foo collection" + assert contents["items"][0]["portable_data_hash"] == c.portable_data_hash() + + finally: + os.environ['HOME'] = home_was + shutil.rmtree(tmphome) diff --git a/services/api/app/controllers/arvados/v1/users_controller.rb b/services/api/app/controllers/arvados/v1/users_controller.rb index 867b9a6e6a..cd23706d08 100644 --- a/services/api/app/controllers/arvados/v1/users_controller.rb +++ b/services/api/app/controllers/arvados/v1/users_controller.rb @@ -22,7 +22,15 @@ class Arvados::V1::UsersController < ApplicationController rescue ActiveRecord::RecordNotUnique retry end - u.update_attributes!(nullify_attrs(attrs)) + needupdate = {} + nullify_attrs(attrs).each do |k,v| + if !v.nil? && u.send(k) != v + needupdate[k] = v + end + end + if needupdate.length > 0 + u.update_attributes!(needupdate) + end @objects << u end @offset = 0 diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb index 6fb8ff2b33..709b4b4d1c 100644 --- a/services/api/app/models/arvados_model.rb +++ b/services/api/app/models/arvados_model.rb @@ -454,7 +454,7 @@ class ArvadosModel < ApplicationRecord end def logged_attributes - attributes.except(*Rails.configuration.AuditLogs.UnloggedAttributes.keys) + attributes.except(*Rails.configuration.AuditLogs.UnloggedAttributes.stringify_keys.keys) end def self.full_text_searchable_columns diff --git a/services/api/lib/config_loader.rb b/services/api/lib/config_loader.rb index cf16993ca5..f421fb5b2a 100644 --- a/services/api/lib/config_loader.rb +++ b/services/api/lib/config_loader.rb @@ -147,14 +147,14 @@ class ConfigLoader 'Ki' => 1 << 10, 'M' => 1000000, 'Mi' => 1 << 20, - "G" => 1000000000, - "Gi" => 1 << 30, - "T" => 1000000000000, - "Ti" => 1 << 40, - "P" => 1000000000000000, - "Pi" => 1 << 50, - "E" => 1000000000000000000, - "Ei" => 1 << 60, + "G" => 1000000000, + "Gi" => 1 << 30, + "T" => 1000000000000, + "Ti" => 1 << 40, + "P" => 1000000000000000, + "Pi" => 1 << 50, + "E" => 1000000000000000000, + "Ei" => 1 << 60, }[mt[2]] end end diff --git a/services/api/lib/enable_jobs_api.rb b/services/api/lib/enable_jobs_api.rb index 1a96a81ad6..cef76f08a5 100644 --- a/services/api/lib/enable_jobs_api.rb +++ b/services/api/lib/enable_jobs_api.rb @@ -2,16 +2,19 @@ # # SPDX-License-Identifier: AGPL-3.0 -Disable_update_jobs_api_method_list = {"jobs.create"=>{}, - "pipeline_instances.create"=>{}, - "pipeline_templates.create"=>{}, - "jobs.update"=>{}, - "pipeline_instances.update"=>{}, - "pipeline_templates.update"=>{}, - "job_tasks.create"=>{}, - "job_tasks.update"=>{}} +Disable_update_jobs_api_method_list = ConfigLoader.to_OrderedOptions({ + "jobs.create"=>{}, + "pipeline_instances.create"=>{}, + "pipeline_templates.create"=>{}, + "jobs.update"=>{}, + "pipeline_instances.update"=>{}, + "pipeline_templates.update"=>{}, + "job_tasks.create"=>{}, + "job_tasks.update"=>{} + }) -Disable_jobs_api_method_list = {"jobs.create"=>{}, +Disable_jobs_api_method_list = ConfigLoader.to_OrderedOptions({ + "jobs.create"=>{}, "pipeline_instances.create"=>{}, "pipeline_templates.create"=>{}, "jobs.get"=>{}, @@ -36,7 +39,7 @@ Disable_jobs_api_method_list = {"jobs.create"=>{}, "jobs.show"=>{}, "pipeline_instances.show"=>{}, "pipeline_templates.show"=>{}, - "job_tasks.show"=>{}} + "job_tasks.show"=>{}}) def check_enable_legacy_jobs_api # Create/update is permanently disabled (legacy functionality has been removed) diff --git a/services/api/test/functional/arvados/v1/groups_controller_test.rb b/services/api/test/functional/arvados/v1/groups_controller_test.rb index ff89cd2129..f413188b54 100644 --- a/services/api/test/functional/arvados/v1/groups_controller_test.rb +++ b/services/api/test/functional/arvados/v1/groups_controller_test.rb @@ -430,7 +430,8 @@ class Arvados::V1::GroupsControllerTest < ActionController::TestCase end test 'get contents with jobs and pipeline instances disabled' do - Rails.configuration.API.DisabledAPIs = {'jobs.index'=>{}, 'pipeline_instances.index'=>{}} + Rails.configuration.API.DisabledAPIs = ConfigLoader.to_OrderedOptions( + {'jobs.index'=>{}, 'pipeline_instances.index'=>{}}) authorize_with :active get :contents, params: { diff --git a/services/api/test/functional/arvados/v1/schema_controller_test.rb b/services/api/test/functional/arvados/v1/schema_controller_test.rb index 3dd343b13c..764f3a8d1d 100644 --- a/services/api/test/functional/arvados/v1/schema_controller_test.rb +++ b/services/api/test/functional/arvados/v1/schema_controller_test.rb @@ -65,8 +65,8 @@ class Arvados::V1::SchemaControllerTest < ActionController::TestCase end test "non-empty disable_api_methods" do - Rails.configuration.API.DisabledAPIs = - {'jobs.create'=>{}, 'pipeline_instances.create'=>{}, 'pipeline_templates.create'=>{}} + Rails.configuration.API.DisabledAPIs = ConfigLoader.to_OrderedOptions( + {'jobs.create'=>{}, 'pipeline_instances.create'=>{}, 'pipeline_templates.create'=>{}}) get :index assert_response :success discovery_doc = JSON.parse(@response.body) diff --git a/services/api/test/functional/arvados/v1/users_controller_test.rb b/services/api/test/functional/arvados/v1/users_controller_test.rb index 0ce9f1137f..ea5d5b1436 100644 --- a/services/api/test/functional/arvados/v1/users_controller_test.rb +++ b/services/api/test/functional/arvados/v1/users_controller_test.rb @@ -1039,9 +1039,12 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase test "batch update" do existinguuid = 'remot-tpzed-foobarbazwazqux' newuuid = 'remot-tpzed-newnarnazwazqux' + unchanginguuid = 'remot-tpzed-nochangingattrs' act_as_system_user do User.create!(uuid: existinguuid, email: 'root@existing.example.com') + User.create!(uuid: unchanginguuid, email: 'root@unchanging.example.com', prefs: {'foo' => {'bar' => 'baz'}}) end + assert_equal(1, Log.where(object_uuid: unchanginguuid).count) authorize_with(:admin) patch(:batch_update, @@ -1059,6 +1062,10 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase 'email' => 'root@remot.example.com', 'username' => '', }, + unchanginguuid => { + 'email' => 'root@unchanging.example.com', + 'prefs' => {'foo' => {'bar' => 'baz'}}, + }, }}) assert_response(:success) @@ -1070,6 +1077,8 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase assert_equal('noot', User.find_by_uuid(newuuid).first_name) assert_equal('root@remot.example.com', User.find_by_uuid(newuuid).email) + + assert_equal(1, Log.where(object_uuid: unchanginguuid).count) end NON_ADMIN_USER_DATA = ["uuid", "kind", "is_active", "email", "first_name", diff --git a/services/api/test/functional/user_sessions_controller_test.rb b/services/api/test/functional/user_sessions_controller_test.rb index cd475dea4d..d979208d38 100644 --- a/services/api/test/functional/user_sessions_controller_test.rb +++ b/services/api/test/functional/user_sessions_controller_test.rb @@ -68,7 +68,7 @@ class UserSessionsControllerTest < ActionController::TestCase test "login to LoginCluster" do Rails.configuration.Login.LoginCluster = 'zbbbb' - Rails.configuration.RemoteClusters['zbbbb'] = {'Host' => 'zbbbb.example.com'} + Rails.configuration.RemoteClusters['zbbbb'] = ConfigLoader.to_OrderedOptions({'Host' => 'zbbbb.example.com'}) api_client_page = 'http://client.example.com/home' get :login, params: {return_to: api_client_page} assert_response :redirect diff --git a/services/api/test/test_helper.rb b/services/api/test/test_helper.rb index c99a57aaff..5dc77cb98a 100644 --- a/services/api/test/test_helper.rb +++ b/services/api/test/test_helper.rb @@ -62,7 +62,7 @@ class ActiveSupport::TestCase include ArvadosTestSupport include CurrentApiClient - teardown do + setup do Thread.current[:api_client_ip_address] = nil Thread.current[:api_client_authorization] = nil Thread.current[:api_client_uuid] = nil @@ -72,6 +72,14 @@ class ActiveSupport::TestCase restore_configuration end + teardown do + # Confirm that any changed configuration doesn't include non-symbol keys + $arvados_config.keys.each do |conf_name| + conf = Rails.configuration.send(conf_name) + confirm_keys_as_symbols(conf, conf_name) if conf.respond_to?('keys') + end + end + def assert_equal(expect, *args) if expect.nil? assert_nil(*args) @@ -99,6 +107,14 @@ class ActiveSupport::TestCase end end + def confirm_keys_as_symbols(conf, conf_name) + assert(conf.is_a?(ActiveSupport::OrderedOptions), "#{conf_name} should be an OrderedOptions object") + conf.keys.each do |k| + assert(k.is_a?(Symbol), "Key '#{k}' on section '#{conf_name}' should be a Symbol") + confirm_keys_as_symbols(conf[k], "#{conf_name}.#{k}") if conf[k].respond_to?('keys') + end + end + def restore_configuration # Restore configuration settings changed during tests ConfigLoader.copy_into_config $arvados_config, Rails.configuration diff --git a/services/api/test/unit/application_test.rb b/services/api/test/unit/application_test.rb index 679dddf223..e1565ec627 100644 --- a/services/api/test/unit/application_test.rb +++ b/services/api/test/unit/application_test.rb @@ -7,7 +7,7 @@ require 'test_helper' class ApplicationTest < ActiveSupport::TestCase include CurrentApiClient - test "test act_as_system_user" do + test "act_as_system_user" do Thread.current[:user] = users(:active) assert_equal users(:active), Thread.current[:user] act_as_system_user do @@ -17,7 +17,7 @@ class ApplicationTest < ActiveSupport::TestCase assert_equal users(:active), Thread.current[:user] end - test "test act_as_system_user is exception safe" do + test "act_as_system_user is exception safe" do Thread.current[:user] = users(:active) assert_equal users(:active), Thread.current[:user] caught = false @@ -33,4 +33,12 @@ class ApplicationTest < ActiveSupport::TestCase assert caught assert_equal users(:active), Thread.current[:user] end + + test "config maps' keys are returned as symbols" do + assert Rails.configuration.Users.AutoSetupUsernameBlacklist.is_a? ActiveSupport::OrderedOptions + assert Rails.configuration.Users.AutoSetupUsernameBlacklist.keys.size > 0 + Rails.configuration.Users.AutoSetupUsernameBlacklist.keys.each do |k| + assert k.is_a? Symbol + end + end end diff --git a/services/api/test/unit/collection_test.rb b/services/api/test/unit/collection_test.rb index addea83062..48cae5afee 100644 --- a/services/api/test/unit/collection_test.rb +++ b/services/api/test/unit/collection_test.rb @@ -1044,10 +1044,10 @@ class CollectionTest < ActiveSupport::TestCase end test "create collections with managed properties" do - Rails.configuration.Collections.ManagedProperties = { + Rails.configuration.Collections.ManagedProperties = ConfigLoader.to_OrderedOptions({ 'default_prop1' => {'Value' => 'prop1_value'}, 'responsible_person_uuid' => {'Function' => 'original_owner'} - } + }) # Test collection without initial properties act_as_user users(:active) do c = create_collection 'foo', Encoding::US_ASCII @@ -1076,9 +1076,9 @@ class CollectionTest < ActiveSupport::TestCase end test "update collection with protected managed properties" do - Rails.configuration.Collections.ManagedProperties = { + Rails.configuration.Collections.ManagedProperties = ConfigLoader.to_OrderedOptions({ 'default_prop1' => {'Value' => 'prop1_value', 'Protected' => true}, - } + }) act_as_user users(:active) do c = create_collection 'foo', Encoding::US_ASCII assert c.valid? diff --git a/services/api/test/unit/container_request_test.rb b/services/api/test/unit/container_request_test.rb index b91910d2d6..90de800b2f 100644 --- a/services/api/test/unit/container_request_test.rb +++ b/services/api/test/unit/container_request_test.rb @@ -576,7 +576,7 @@ class ContainerRequestTest < ActiveSupport::TestCase test "Container.resolve_container_image(pdh)" do set_user_from_auth :active [[:docker_image, 'v1'], [:docker_image_1_12, 'v2']].each do |coll, ver| - Rails.configuration.Containers.SupportedDockerImageFormats = {ver=>{}} + Rails.configuration.Containers.SupportedDockerImageFormats = ConfigLoader.to_OrderedOptions({ver=>{}}) pdh = collections(coll).portable_data_hash resolved = Container.resolve_container_image(pdh) assert_equal resolved, pdh @@ -602,7 +602,7 @@ class ContainerRequestTest < ActiveSupport::TestCase end test "migrated docker image" do - Rails.configuration.Containers.SupportedDockerImageFormats = {'v2'=>{}} + Rails.configuration.Containers.SupportedDockerImageFormats = ConfigLoader.to_OrderedOptions({'v2'=>{}}) add_docker19_migration_link # Test that it returns only v2 images even though request is for v1 image. @@ -620,7 +620,7 @@ class ContainerRequestTest < ActiveSupport::TestCase end test "use unmigrated docker image" do - Rails.configuration.Containers.SupportedDockerImageFormats = {'v1'=>{}} + Rails.configuration.Containers.SupportedDockerImageFormats = ConfigLoader.to_OrderedOptions({'v1'=>{}}) add_docker19_migration_link # Test that it returns only supported v1 images even though there is a @@ -639,7 +639,7 @@ class ContainerRequestTest < ActiveSupport::TestCase end test "incompatible docker image v1" do - Rails.configuration.Containers.SupportedDockerImageFormats = {'v1'=>{}} + Rails.configuration.Containers.SupportedDockerImageFormats = ConfigLoader.to_OrderedOptions({'v1'=>{}}) add_docker19_migration_link # Don't return unsupported v2 image even if we ask for it directly. @@ -652,7 +652,7 @@ class ContainerRequestTest < ActiveSupport::TestCase end test "incompatible docker image v2" do - Rails.configuration.Containers.SupportedDockerImageFormats = {'v2'=>{}} + Rails.configuration.Containers.SupportedDockerImageFormats = ConfigLoader.to_OrderedOptions({'v2'=>{}}) # No migration link, don't return unsupported v1 image, set_user_from_auth :active diff --git a/services/api/test/unit/job_test.rb b/services/api/test/unit/job_test.rb index 0e8cc48538..c529aab8b6 100644 --- a/services/api/test/unit/job_test.rb +++ b/services/api/test/unit/job_test.rb @@ -117,7 +117,7 @@ class JobTest < ActiveSupport::TestCase 'locator' => BAD_COLLECTION, }.each_pair do |spec_type, image_spec| test "Job validation fails with nonexistent Docker image #{spec_type}" do - Rails.configuration.RemoteClusters = {} + Rails.configuration.RemoteClusters = ConfigLoader.to_OrderedOptions({}) job = Job.new job_attrs(runtime_constraints: {'docker_image' => image_spec}) assert(job.invalid?, "nonexistent Docker image #{spec_type} #{image_spec} was valid") diff --git a/services/api/test/unit/log_test.rb b/services/api/test/unit/log_test.rb index 016a0e4eb4..58f0cddb70 100644 --- a/services/api/test/unit/log_test.rb +++ b/services/api/test/unit/log_test.rb @@ -282,7 +282,7 @@ class LogTest < ActiveSupport::TestCase end test "non-empty configuration.unlogged_attributes" do - Rails.configuration.AuditLogs.UnloggedAttributes = {"manifest_text"=>{}} + Rails.configuration.AuditLogs.UnloggedAttributes = ConfigLoader.to_OrderedOptions({"manifest_text"=>{}}) txt = ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n" act_as_system_user do @@ -297,7 +297,7 @@ class LogTest < ActiveSupport::TestCase end test "empty configuration.unlogged_attributes" do - Rails.configuration.AuditLogs.UnloggedAttributes = {} + Rails.configuration.AuditLogs.UnloggedAttributes = ConfigLoader.to_OrderedOptions({}) txt = ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n" act_as_system_user do diff --git a/services/api/test/unit/user_test.rb b/services/api/test/unit/user_test.rb index 7fcd36d709..b6d66230db 100644 --- a/services/api/test/unit/user_test.rb +++ b/services/api/test/unit/user_test.rb @@ -110,7 +110,7 @@ class UserTest < ActiveSupport::TestCase end test "new username set avoiding blacklist" do - Rails.configuration.Users.AutoSetupUsernameBlacklist = {"root"=>{}} + Rails.configuration.Users.AutoSetupUsernameBlacklist = ConfigLoader.to_OrderedOptions({"root"=>{}}) check_new_username_setting("root", "root2") end @@ -340,48 +340,52 @@ class UserTest < ActiveSupport::TestCase assert_equal(user.first_name, 'first_name_for_newly_created_user_updated') end + active_notify_list = ConfigLoader.to_OrderedOptions({"active-notify@example.com"=>{}}) + inactive_notify_list = ConfigLoader.to_OrderedOptions({"inactive-notify@example.com"=>{}}) + empty_notify_list = ConfigLoader.to_OrderedOptions({}) + test "create new user with notifications" do set_user_from_auth :admin - create_user_and_verify_setup_and_notifications true, {'active-notify-address@example.com'=>{}}, {'inactive-notify-address@example.com'=>{}}, nil, nil - create_user_and_verify_setup_and_notifications true, {'active-notify-address@example.com'=>{}}, {}, nil, nil - create_user_and_verify_setup_and_notifications true, {}, [], nil, nil - create_user_and_verify_setup_and_notifications false, {'active-notify-address@example.com'=>{}}, {'inactive-notify-address@example.com'=>{}}, nil, nil - create_user_and_verify_setup_and_notifications false, {}, {'inactive-notify-address@example.com'=>{}}, nil, nil - create_user_and_verify_setup_and_notifications false, {}, {}, nil, nil + create_user_and_verify_setup_and_notifications true, active_notify_list, inactive_notify_list, nil, nil + create_user_and_verify_setup_and_notifications true, active_notify_list, empty_notify_list, nil, nil + create_user_and_verify_setup_and_notifications true, empty_notify_list, empty_notify_list, nil, nil + create_user_and_verify_setup_and_notifications false, active_notify_list, inactive_notify_list, nil, nil + create_user_and_verify_setup_and_notifications false, empty_notify_list, inactive_notify_list, nil, nil + create_user_and_verify_setup_and_notifications false, empty_notify_list, empty_notify_list, nil, nil end [ # Easy inactive user tests. - [false, {}, {}, "inactive-none@example.com", false, false, "inactivenone"], - [false, {}, {}, "inactive-vm@example.com", true, false, "inactivevm"], - [false, {}, {}, "inactive-repo@example.com", false, true, "inactiverepo"], - [false, {}, {}, "inactive-both@example.com", true, true, "inactiveboth"], + [false, empty_notify_list, empty_notify_list, "inactive-none@example.com", false, false, "inactivenone"], + [false, empty_notify_list, empty_notify_list, "inactive-vm@example.com", true, false, "inactivevm"], + [false, empty_notify_list, empty_notify_list, "inactive-repo@example.com", false, true, "inactiverepo"], + [false, empty_notify_list, empty_notify_list, "inactive-both@example.com", true, true, "inactiveboth"], # Easy active user tests. - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "active-none@example.com", false, false, "activenone"], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "active-vm@example.com", true, false, "activevm"], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "active-repo@example.com", false, true, "activerepo"], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "active-both@example.com", true, true, "activeboth"], + [true, active_notify_list, inactive_notify_list, "active-none@example.com", false, false, "activenone"], + [true, active_notify_list, inactive_notify_list, "active-vm@example.com", true, false, "activevm"], + [true, active_notify_list, inactive_notify_list, "active-repo@example.com", false, true, "activerepo"], + [true, active_notify_list, inactive_notify_list, "active-both@example.com", true, true, "activeboth"], # Test users with malformed e-mail addresses. - [false, {}, {}, nil, true, true, nil], - [false, {}, {}, "arvados", true, true, nil], - [false, {}, {}, "@example.com", true, true, nil], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "*!*@example.com", true, false, nil], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "*!*@example.com", false, false, nil], + [false, empty_notify_list, empty_notify_list, nil, true, true, nil], + [false, empty_notify_list, empty_notify_list, "arvados", true, true, nil], + [false, empty_notify_list, empty_notify_list, "@example.com", true, true, nil], + [true, active_notify_list, inactive_notify_list, "*!*@example.com", true, false, nil], + [true, active_notify_list, inactive_notify_list, "*!*@example.com", false, false, nil], # Test users with various username transformations. - [false, {}, {}, "arvados@example.com", false, false, "arvados2"], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "arvados@example.com", false, false, "arvados2"], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "root@example.com", true, false, "root2"], - [false, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "root@example.com", true, false, "root2"], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "roo_t@example.com", false, true, "root2"], - [false, {}, {}, "^^incorrect_format@example.com", true, true, "incorrectformat"], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "&4a_d9.@example.com", true, true, "ad9"], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "&4a_d9.@example.com", false, false, "ad9"], - [false, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "&4a_d9.@example.com", true, true, "ad9"], - [false, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "&4a_d9.@example.com", false, false, "ad9"], + [false, empty_notify_list, empty_notify_list, "arvados@example.com", false, false, "arvados2"], + [true, active_notify_list, inactive_notify_list, "arvados@example.com", false, false, "arvados2"], + [true, active_notify_list, inactive_notify_list, "root@example.com", true, false, "root2"], + [false, active_notify_list, inactive_notify_list, "root@example.com", true, false, "root2"], + [true, active_notify_list, inactive_notify_list, "roo_t@example.com", false, true, "root2"], + [false, empty_notify_list, empty_notify_list, "^^incorrect_format@example.com", true, true, "incorrectformat"], + [true, active_notify_list, inactive_notify_list, "&4a_d9.@example.com", true, true, "ad9"], + [true, active_notify_list, inactive_notify_list, "&4a_d9.@example.com", false, false, "ad9"], + [false, active_notify_list, inactive_notify_list, "&4a_d9.@example.com", true, true, "ad9"], + [false, active_notify_list, inactive_notify_list, "&4a_d9.@example.com", false, false, "ad9"], ].each do |active, new_user_recipients, inactive_recipients, email, auto_setup_vm, auto_setup_repo, expect_username| test "create new user with auto setup #{active} #{email} #{auto_setup_vm} #{auto_setup_repo}" do set_user_from_auth :admin @@ -569,7 +573,6 @@ class UserTest < ActiveSupport::TestCase assert_not_nil resp_user, 'expected user object' assert_not_nil resp_user['uuid'], 'expected user object' assert_equal email, resp_user['email'], 'expected email not found' - end def verify_link (link_object, link_class, link_name, tail_uuid, head_uuid) @@ -648,7 +651,7 @@ class UserTest < ActiveSupport::TestCase if not new_user_recipients.empty? then assert_not_nil new_user_email, 'Expected new user email after setup' assert_equal Rails.configuration.Users.UserNotifierEmailFrom, new_user_email.from[0] - assert_equal new_user_recipients.keys.first, new_user_email.to[0] + assert_equal new_user_recipients.stringify_keys.keys.first, new_user_email.to[0] assert_equal new_user_email_subject, new_user_email.subject else assert_nil new_user_email, 'Did not expect new user email after setup' @@ -658,7 +661,7 @@ class UserTest < ActiveSupport::TestCase if not inactive_recipients.empty? then assert_not_nil new_inactive_user_email, 'Expected new inactive user email after setup' assert_equal Rails.configuration.Users.UserNotifierEmailFrom, new_inactive_user_email.from[0] - assert_equal inactive_recipients.keys.first, new_inactive_user_email.to[0] + assert_equal inactive_recipients.stringify_keys.keys.first, new_inactive_user_email.to[0] assert_equal "#{Rails.configuration.Users.EmailSubjectPrefix}New inactive user notification", new_inactive_user_email.subject else assert_nil new_inactive_user_email, 'Did not expect new inactive user email after setup' @@ -667,7 +670,6 @@ class UserTest < ActiveSupport::TestCase assert_nil new_inactive_user_email, 'Expected no inactive user email after setting up active user' end ActionMailer::Base.deliveries = [] - end def verify_link_exists link_exists, head_uuid, tail_uuid, link_class, link_name, property_name=nil, property_value=nil diff --git a/tools/arvbox/lib/arvbox/docker/Dockerfile.base b/tools/arvbox/lib/arvbox/docker/Dockerfile.base index bde5ffe898..815db22b4c 100644 --- a/tools/arvbox/lib/arvbox/docker/Dockerfile.base +++ b/tools/arvbox/lib/arvbox/docker/Dockerfile.base @@ -10,18 +10,20 @@ RUN apt-get update && \ apt-get -yq --no-install-recommends -o Acquire::Retries=6 install \ postgresql-9.6 postgresql-contrib-9.6 git build-essential runit curl libpq-dev \ libcurl4-openssl-dev libssl1.0-dev zlib1g-dev libpcre3-dev libpam-dev \ - openssh-server python-setuptools netcat-traditional \ - python-epydoc graphviz bzip2 less sudo virtualenv \ - libpython-dev fuse libfuse-dev python-pip python-yaml \ - pkg-config libattr1-dev python-pycurl \ + openssh-server netcat-traditional \ + graphviz bzip2 less sudo virtualenv \ + libpython-dev fuse libfuse-dev \ + pkg-config libattr1-dev \ libwww-perl libio-socket-ssl-perl libcrypt-ssleay-perl \ libjson-perl nginx gitolite3 lsof libreadline-dev \ - apt-transport-https ca-certificates \ - linkchecker python3-virtualenv python-virtualenv xvfb iceweasel \ + apt-transport-https ca-certificates python3-yaml \ + linkchecker python3-virtualenv python3-venv xvfb iceweasel \ libgnutls28-dev python3-dev vim cadaver cython gnupg dirmngr \ libsecret-1-dev r-base r-cran-testthat libxml2-dev pandoc \ python3-setuptools python3-pip openjdk-8-jdk bsdmainutils net-tools \ - ruby2.3 ruby-dev bundler shellinabox && \ + ruby2.3 ruby-dev bundler shellinabox && \ + apt-get remove -yq libpython-dev libpython-stdlib libpython2.7 libpython2.7-dev \ + libpython2.7-minimal libpython2.7-stdlib python2.7-minimal python2.7 && \ apt-get clean ENV RUBYVERSION_MINOR 2.3 @@ -78,8 +80,6 @@ ENV GDVERSION=v0.23.0 ENV GDURL=https://github.com/mozilla/geckodriver/releases/download/$GDVERSION/geckodriver-$GDVERSION-linux64.tar.gz RUN set -e && curl -L -f ${GDURL} | tar -C /usr/local/bin -xzf - geckodriver -RUN pip install -U setuptools - ENV NODEVERSION v8.15.1 # Install nodejs binary diff --git a/tools/arvbox/lib/arvbox/docker/service/sdk/run-service b/tools/arvbox/lib/arvbox/docker/service/sdk/run-service index 8a36140bcf..1ec225ca12 100755 --- a/tools/arvbox/lib/arvbox/docker/service/sdk/run-service +++ b/tools/arvbox/lib/arvbox/docker/service/sdk/run-service @@ -37,13 +37,13 @@ fi pip_install wheel cd /usr/src/arvados/sdk/python -python setup.py sdist +$PYCMD setup.py sdist pip_install $(ls dist/arvados-python-client-*.tar.gz | tail -n1) cd /usr/src/arvados/services/fuse -python setup.py sdist +$PYCMD setup.py sdist pip_install $(ls dist/arvados_fuse-*.tar.gz | tail -n1) cd /usr/src/arvados/sdk/cwl -python setup.py sdist +$PYCMD setup.py sdist pip_install $(ls dist/arvados-cwl-runner-*.tar.gz | tail -n1) diff --git a/tools/arvbox/lib/arvbox/docker/yml_override.py b/tools/arvbox/lib/arvbox/docker/yml_override.py index 446448f5eb..7f35ac1d68 100755 --- a/tools/arvbox/lib/arvbox/docker/yml_override.py +++ b/tools/arvbox/lib/arvbox/docker/yml_override.py @@ -20,7 +20,7 @@ with open(fn) as f: def recursiveMerge(a, b): if isinstance(a, dict) and isinstance(b, dict): for k in b: - print k + print(k) a[k] = recursiveMerge(a.get(k), b[k]) return a else: diff --git a/tools/copy-tutorial/copy-tutorial.sh b/tools/copy-tutorial/copy-tutorial.sh index bdc75da2e1..e7fac7af48 100755 --- a/tools/copy-tutorial/copy-tutorial.sh +++ b/tools/copy-tutorial/copy-tutorial.sh @@ -1,25 +1,83 @@ -#!/bin/sh +#!/bin/bash # Copyright (C) The Arvados Authors. All rights reserved. # # SPDX-License-Identifier: AGPL-3.0 -set -e +set -e -o pipefail -if test -z "$1" ; then +if test -z "$1" ; then echo "$0: Copies Arvados tutorial resources from public data cluster (jutro)" - echo "Usage: copy-tutorial.sh " - echo " is destination cluster configuration that can be found in ~/.config/arvados" + echo "Usage: copy-tutorial.sh " + echo " is which tutorial to copy, one of:" + echo " bwa-mem Tutorial from https://doc.arvados.org/user/tutorials/tutorial-workflow-workbench.html" + echo " whole-genome Whole genome variant calling tutorial workflow (large)" exit fi -echo "Copying from public data cluster (jutro) to $1" +if test -z "ARVADOS_API_HOST" ; then + echo "Please set ARVADOS_API_HOST to the destination cluster" + exit +fi + +src=jutro +tutorial=$1 + +if ! test -f $HOME/.config/arvados/jutro.conf ; then + # Set it up with the anonymous user token. + echo "ARVADOS_API_HOST=jutro.arvadosapi.com" > $HOME/.config/arvados/jutro.conf + echo "ARVADOS_API_TOKEN=v2/jutro-gj3su-e2o9x84aeg7q005/22idg1m3zna4qe4id3n0b9aw86t72jdw8qu1zj45aboh1mm4ej" >> $HOME/.config/arvados/jutro.conf + exit 1 +fi + +echo +echo "Copying from public data cluster (jutro) to $ARVADOS_API_HOST" +echo + +make_project() { + name="$1" + owner="$2" + if test -z "$owner" ; then + owner=$(arv --format=uuid user current) + fi + project_uuid=$(arv --format=uuid group list --filters '[["name", "=", "'"$name"'"], ["owner_uuid", "=", "'$owner'"]]') + if test -z "$project_uuid" ; then + project_uuid=$(arv --format=uuid group create --group '{"name":"'"$name"'", "group_class": "project", "owner_uuid": "'$owner'"}') + + fi + echo $project_uuid +} -for a in $(cat $HOME/.config/arvados/$1.conf) ; do export $a ; done +copy_jobs_image() { + if ! arv-keepdocker | grep "arvados/jobs *latest" ; then + arv-copy --project-uuid=$parent_project jutro-4zz18-sxmit0qs6i9n2s4 + fi +} -project_uuid=$(arv --format=uuid group create --group '{"name":"User guide resources", "group_class": "project"}') +parent_project=$(make_project "Tutorial projects") +copy_jobs_image -# Bwa-mem workflow -arv-copy --src jutro --dst $1 --project-uuid=$project_uuid f141fc27e7cfa7f7b6d208df5e0ee01b+59 -arv-copy --src jutro --dst $1 --project-uuid=$project_uuid jutro-7fd4e-mkmmq53m1ze6apx +if test "$tutorial" = "bwa-mem" ; then + echo + echo "Copying bwa mem tutorial" + echo -echo "Data copied to \"User guide resources\" at $project_uuid" + arv-copy --project-uuid=$parent_project jutro-j7d0g-rehmt1w5v2p2drp + + echo + echo "Finished, data copied to \"User guide resources\" at $parent_project" + echo "You can now go to Workbench and choose 'Run a process' and then select 'bwa-mem.cwl'" + echo +fi + +if test "$tutorial" = "whole-genome" ; then + echo + echo "Copying whole genome variant calling tutorial" + echo + + arv-copy --project-uuid=$parent_project jutro-j7d0g-n2g87m02rsl4cx2 + + echo + echo "Finished, data copied to \"WGS Processing Tutorial\" at $parent_project" + echo "You can now go to Workbench and choose 'Run a process' and then select 'WGS Processing Tutorial'" + echo +fi