get_combined(:output_path)
end
+ def log_object_uuids
+ [get_combined(:uuid), get(:uuid)].uniq
+ end
+
def live_log_lines(limit=2000)
event_types = ["stdout", "stderr", "arv-mount", "crunch-run"]
- log_lines = Log.where(event_type: event_types, object_uuid: uuid).order("id DESC").limit(limit)
+ log_lines = Log.where(event_type: event_types, object_uuid: log_object_uuids).order("id DESC").limit(limit)
log_lines.results.reverse.
flat_map { |log| log.properties[:text].split("\n") rescue [] }
end
resp
end
+ def log_object_uuids
+ [uuid]
+ end
+
protected
def get key, obj=@proxied
# container_uuid of a container_request
end
+ def log_object_uuids
+ # object uuids for live log
+ end
+
def live_log_lines(limit)
# fetch log entries from logs table for @proxied
end
<h4>Recent logs</h4>
<div id="event_log_div"
class="arv-log-event-listener arv-log-event-handler-append-logs arv-job-log-window"
- data-object-uuid="<%= wu.uuid %>"
+ data-object-uuids="<%= wu.log_object_uuids.join(' ') %>"
><%= wu.live_log_lines(Rails.configuration.running_job_log_records_to_fetch).join("\n") %>
</div>
["pipeline_instances", api_fixture("pipeline_instances")['pipeline_with_newer_template']['uuid']],
["jobs", api_fixture("jobs")['running']['uuid']],
["containers", api_fixture("containers")['running']['uuid']],
- ["container_requests", api_fixture("container_requests")['running']['uuid']],
+ ["container_requests", api_fixture("container_requests")['running']['uuid'], api_fixture("containers")['running']['uuid']],
].each do |c|
test "test live logging scrolling #{c[0]}" do
controller = c[0]
uuid = c[1]
+ log_uuid = c[2] || c[1]
visit(page_with_token("admin", "/#{controller}/#{uuid}"))
click_link("Log")
Thread.current[:arvados_api_token] = @@API_AUTHS["admin"]['api_token']
api.api("logs", "", {log: {
- object_uuid: uuid,
+ object_uuid: log_uuid,
event_type: "stderr",
properties: {"text" => text}}})
assert_text '1000 hello'
old_top = page.evaluate_script("$('#event_log_div').scrollTop()")
api.api("logs", "", {log: {
- object_uuid: uuid,
+ object_uuid: log_uuid,
event_type: "stderr",
properties: {"text" => "1001 hello\n"}}})
assert_text '1001 hello'
assert_equal 30, page.evaluate_script("$('#event_log_div').scrollTop()")
api.api("logs", "", {log: {
- object_uuid: uuid,
+ object_uuid: log_uuid,
event_type: "stderr",
properties: {"text" => "1002 hello\n"}}})
assert_text '1002 hello'
# So we build this thing separately.
#
# Ward, 2016-03-17
-fpm_build schema_salad "" "" python 1.14.20160708181155
+fpm_build schema_salad "" "" python 1.16.20160810195039
# And schema_salad now depends on ruamel-yaml, which apparently has a braindead setup.py that requires special arguments to build (otherwise, it aborts with 'error: you have to install with "pip install ."'). Sigh.
# Ward, 2016-05-26
fpm_build ruamel.yaml "" "" python 0.11.11 --python-setup-py-arguments "--single-version-externally-managed"
# And for cwltool we have the same problem as for schema_salad. Ward, 2016-03-17
-fpm_build cwltool "" "" python 1.0.20160726135535
+fpm_build cwltool "" "" python 1.0.20160811184335
# FPM eats the trailing .0 in the python-rdflib-jsonld package when built with 'rdflib-jsonld>=0.3.0'. Force the version. Ward, 2016-03-25
fpm_build rdflib-jsonld "" "" python 0.3.0
import os
import json
import argparse
+import re
from arvados.api import OrderedJsonModel
-from cwltool.process import adjustFileObjs
+from cwltool.process import adjustFileObjs, adjustDirObjs
from cwltool.load_tool import load_tool
# Print package versions
try:
job_order_object = arvados.current_job()['script_parameters']
+ pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$')
+
def keeppath(v):
- if arvados.util.keep_locator_pattern.match(v):
+ if pdh_path.match(v):
return "keep:%s" % v
else:
return v
}
adjustFileObjs(job_order_object, keeppathObj)
+ adjustDirObjs(job_order_object, keeppathObj)
runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api('v1', model=OrderedJsonModel()))
import sys
import threading
import hashlib
+from functools import partial
import pkg_resources # part of setuptools
from cwltool.errors import WorkflowException
from cwltool.process import shortname, UnsupportedRequirement
from cwltool.pathmapper import adjustFileObjs
+from cwltool.draft2tool import compute_checksums
from arvados.api import OrderedJsonModel
logger = logging.getLogger('arvados.cwl-runner')
def add_uploaded(self, src, pair):
self.uploaded[src] = pair
+ def check_writable(self, obj):
+ if isinstance(obj, dict):
+ if obj.get("writable"):
+ raise UnsupportedRequirement("InitialWorkDir feature 'writable: true' not supported")
+ for v in obj.itervalues():
+ self.check_writable(v)
+ if isinstance(obj, list):
+ for v in obj:
+ self.check_writable(v)
+
def arvExecutor(self, tool, job_order, **kwargs):
self.debug = kwargs.get("debug")
+ tool.visit(self.check_writable)
+
if kwargs.get("quiet"):
logger.setLevel(logging.WARN)
logging.getLogger('arvados.arv-run').setLevel(logging.WARN)
useruuid = self.api.users().current().execute()["uuid"]
self.project_uuid = kwargs.get("project_uuid") if kwargs.get("project_uuid") else useruuid
self.pipeline = None
- self.fs_access = CollectionFsAccess(kwargs["basedir"], api_client=self.api)
+ make_fs_access = kwargs.get("make_fs_access") or partial(CollectionFsAccess, api_client=self.api)
+ self.fs_access = make_fs_access(kwargs["basedir"])
if kwargs.get("create_template"):
tmpl = RunnerTemplate(self, tool, job_order, kwargs.get("enable_reuse"))
self.debug = kwargs.get("debug")
self.ignore_docker_for_reuse = kwargs.get("ignore_docker_for_reuse")
- kwargs["fs_access"] = self.fs_access
+ kwargs["make_fs_access"] = make_fs_access
kwargs["enable_reuse"] = kwargs.get("enable_reuse")
kwargs["use_container"] = True
kwargs["tmpdir_prefix"] = "tmp"
kwargs["outdir"] = "/var/spool/cwl"
kwargs["docker_outdir"] = "/var/spool/cwl"
kwargs["tmpdir"] = "/tmp"
+ kwargs["docker_tmpdir"] = "/tmp"
elif self.work_api == "jobs":
kwargs["outdir"] = "$(task.outdir)"
kwargs["docker_outdir"] = "$(task.outdir)"
raise WorkflowException("Workflow did not return a result.")
if kwargs.get("compute_checksum"):
- def compute_checksums(fileobj):
- if "checksum" not in fileobj:
- checksum = hashlib.sha1()
- with self.fs_access.open(fileobj["location"], "rb") as f:
- contents = f.read(1024*1024)
- while contents != "":
- checksum.update(contents)
- contents = f.read(1024*1024)
- fileobj["checksum"] = "sha1$%s" % checksum.hexdigest()
-
- adjustFileObjs(self.final_output, compute_checksums)
+ adjustFileObjs(self.final_output, partial(compute_checksums, self.fs_access))
return self.final_output
executor=runner.arvExecutor,
makeTool=runner.arvMakeTool,
versionfunc=versionstring,
- job_order_object=job_order_object)
+ job_order_object=job_order_object,
+ make_fs_access=partial(CollectionFsAccess, api_client=api_client))
if self.generatefiles["listing"]:
raise UnsupportedRequirement("Generate files not supported")
- container_request["environment"] = {"TMPDIR": "/tmp"}
+ container_request["environment"] = {"TMPDIR": self.tmpdir, "HOME": self.outdir}
if self.environment:
container_request["environment"].update(self.environment)
self.arvrunner.processes[response["container_uuid"]] = self
- logger.info("Container %s (%s) request state is %s", self.name, response["container_uuid"], response["state"])
+ logger.info("Container %s (%s) request state is %s", self.name, response["uuid"], response["state"])
if response["state"] == "Final":
self.done(response)
if p.type == "CreateFile":
script_parameters["task.vwd"][p.target] = "$(task.keep)/%s/%s" % (vwd.portable_data_hash(), p.target)
- script_parameters["task.env"] = {"TMPDIR": "$(task.tmpdir)"}
+ script_parameters["task.env"] = {"TMPDIR": self.tmpdir, "HOME": self.outdir}
if self.environment:
script_parameters["task.env"].update(self.environment)
logger.info("Submitted job %s", response["uuid"])
if kwargs.get("submit"):
- self.pipeline = self.arvrunner.api.pipeline_instances().create(
+ self.arvrunner.pipeline = self.arvrunner.api.pipeline_instances().create(
body={
"owner_uuid": self.arvrunner.project_uuid,
"name": shortname(self.tool.tool["id"]),
elif self.work_api == "jobs":
kwargs["outdir"] = "$(task.outdir)"
kwargs["docker_outdir"] = "$(task.outdir)"
+ kwargs["tmpdir"] = "$(task.tmpdir)"
+ kwargs["docker_tmpdir"] = "$(task.tmpdir)"
return super(ArvadosCommandTool, self).job(joborder, output_callback, **kwargs)
import fnmatch
import os
-import cwltool.process
+import cwltool.stdfsaccess
from cwltool.pathmapper import abspath
import arvados.util
import arvados.collection
import arvados.arvfile
-class CollectionFsAccess(cwltool.process.StdFsAccess):
+class CollectionFsAccess(cwltool.stdfsaccess.StdFsAccess):
"""Implement the cwltool FsAccess interface for Arvados Collections."""
def __init__(self, basedir, api_client=None):
def listdir(self, fn): # type: (unicode) -> List[unicode]
collection, rest = self.get_collection(fn)
- if rest:
- dir = collection.find(rest)
- else:
- dir = collection
if collection:
+ if rest:
+ dir = collection.find(rest)
+ else:
+ dir = collection
return [abspath(l, fn) for l in dir.keys()]
else:
return super(CollectionFsAccess, self).listdir(fn)
if paths and paths[-1].startswith("keep:") and arvados.util.keep_locator_pattern.match(paths[-1][5:]):
return paths[-1]
return os.path.join(path, *paths)
+
+ def realpath(self, path):
+ if path.startswith("$(task.tmpdir)") or path.startswith("$(task.outdir)"):
+ return path
+ collection, rest = self.get_collection(path)
+ if collection:
+ return path
+ else:
+ return os.path.realpath(path)
elif srcobj["class"] == "Directory":
if isinstance(src, basestring) and ArvPathMapper.pdh_dirpath.match(src):
self._pathmap[src] = MapperEnt(src, self.collection_pattern % src[5:], "Directory")
- else:
- for l in srcobj["listing"]:
- self.visit(l, uploadfiles)
+ for l in srcobj["listing"]:
+ self.visit(l, uploadfiles)
def addentry(self, obj, c, path, subdirs):
if obj["location"] in self._pathmap:
# Make sure to update arvados/build/run-build-packages.sh as well
# when updating the cwltool version pin.
install_requires=[
- 'cwltool==1.0.20160726135535',
+ 'cwltool==1.0.20160811184335',
'arvados-python-client>=0.1.20160714204738',
],
data_files=[
--- /dev/null
+#!/bin/sh
+if ! arv-get d7514270f356df848477718d58308cc4+94 > /dev/null ; then
+ arv-put --portable-data-hash testdir
+fi
+exec cwltest --test arvados-tests.yml --tool $PWD/runner.sh
--- /dev/null
+- job: dir-job.yml
+ output:
+ "outlist": {
+ "size": 20,
+ "location": "output.txt",
+ "class": "File",
+ "checksum": "sha1$13cda8661796ae241da3a18668fb552161a72592"
+ }
+ tool: keep-dir-test-input.cwl
+ doc: Test directory in keep
--- /dev/null
+indir:
+ class: Directory
+ location: keep:d7514270f356df848477718d58308cc4+94
\ No newline at end of file
--- /dev/null
+class: CommandLineTool
+cwlVersion: v1.0
+requirements:
+ - class: ShellCommandRequirement
+inputs:
+ indir:
+ type: Directory
+ inputBinding:
+ prefix: cd
+ position: -1
+outputs:
+ outlist:
+ type: File
+ outputBinding:
+ glob: output.txt
+arguments: [
+ {shellQuote: false, valueFrom: "&&"},
+ "find", ".",
+ {shellQuote: false, valueFrom: "|"},
+ "sort"]
+stdout: output.txt
\ No newline at end of file
--- /dev/null
+#!/bin/sh
+exec arvados-cwl-runner --disable-reuse --compute-checksum "$@"
import mock
import unittest
import os
+import functools
import cwltool.process
if not os.getenv('ARVADOS_DEBUG'):
"baseCommand": "ls",
"arguments": [{"valueFrom": "$(runtime.outdir)"}]
}
- arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers", avsc_names=avsc_names, basedir="")
+ make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, api_client=runner.api)
+ arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers", avsc_names=avsc_names,
+ basedir="", make_fs_access=make_fs_access)
arvtool.formatgraph = None
- for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_run"):
+ for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_run",
+ make_fs_access=make_fs_access, tmpdir="/tmp"):
j.run()
runner.api.container_requests().create.assert_called_with(
body={
'environment': {
+ 'HOME': '/var/spool/cwl',
'TMPDIR': '/tmp'
},
'name': 'test_run',
}],
"baseCommand": "ls"
}
- arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers", avsc_names=avsc_names)
+ make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, api_client=runner.api)
+ arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers",
+ avsc_names=avsc_names, make_fs_access=make_fs_access)
arvtool.formatgraph = None
- for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_resource_requirements"):
+ for j in arvtool.job({}, mock.MagicMock(), basedir="", name="test_resource_requirements",
+ make_fs_access=make_fs_access, tmpdir="/tmp"):
j.run()
runner.api.container_requests().create.assert_called_with(
body={
'environment': {
+ 'HOME': '/var/spool/cwl',
'TMPDIR': '/tmp'
},
'name': 'test_resource_requirements',
import mock
import unittest
import os
+import functools
import cwltool.process
if not os.getenv('ARVADOS_DEBUG'):
"baseCommand": "ls",
"arguments": [{"valueFrom": "$(runtime.outdir)"}]
}
- arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="jobs", avsc_names=avsc_names, basedir="")
+ make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, api_client=runner.api)
+ arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="jobs", avsc_names=avsc_names, basedir="", make_fs_access=make_fs_access)
arvtool.formatgraph = None
- for j in arvtool.job({}, mock.MagicMock(), basedir=""):
+ for j in arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access):
j.run()
runner.api.jobs().create.assert_called_with(
body={
'runtime_constraints': {},
'script_parameters': {
'tasks': [{
- 'task.env': {'TMPDIR': '$(task.tmpdir)'},
+ 'task.env': {'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)'},
'command': ['ls', '$(task.outdir)']
}],
},
}],
"baseCommand": "ls"
}
- arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="jobs", avsc_names=avsc_names)
+ make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, api_client=runner.api)
+ arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="jobs", avsc_names=avsc_names, make_fs_access=make_fs_access)
arvtool.formatgraph = None
- for j in arvtool.job({}, mock.MagicMock(), basedir=""):
+ for j in arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access):
j.run()
runner.api.jobs().create.assert_called_with(
body={
'runtime_constraints': {},
'script_parameters': {
'tasks': [{
- 'task.env': {'TMPDIR': '$(task.tmpdir)'},
+ 'task.env': {'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)'},
'command': ['ls']
}]
},
api_column_map
end
+ def self.ignored_select_attributes
+ ["href", "kind", "etag"]
+ end
+
def self.columns_for_attributes(select_attributes)
+ if select_attributes.empty?
+ raise ArgumentError.new("Attribute selection list cannot be empty")
+ end
+ api_column_map = attributes_required_columns
+ invalid_attrs = []
+ select_attributes.each do |s|
+ next if ignored_select_attributes.include? s
+ if not s.is_a? String or not api_column_map.include? s
+ invalid_attrs << s
+ end
+ end
+ if not invalid_attrs.empty?
+ raise ArgumentError.new("Invalid attribute(s): #{invalid_attrs.inspect}")
+ end
# Given an array of attribute names to select, return an array of column
# names that must be fetched from the database to satisfy the request.
- api_column_map = attributes_required_columns
select_attributes.flat_map { |attr| api_column_map[attr] }.uniq
end
)
end
+ def self.ignored_select_attributes
+ super + ["updated_at", "file_names"]
+ end
+
FILE_TOKEN = /^[[:digit:]]+:[[:digit:]]+:/
def check_signatures
return false if self.manifest_text.nil?
super - ["manifest_text"]
end
+ def logged_attributes
+ attrs = attributes.dup
+ attrs.delete('manifest_text')
+ attrs
+ end
+
protected
def portable_manifest_text
self.class.munge_manifest_locators(manifest_text) do |match|
assert_equal "arvados#collectionList", json_response['kind']
end
+ test "get index with select= (valid attribute)" do
+ get "/arvados/v1/collections", {
+ :format => :json,
+ :select => ['portable_data_hash'].to_json
+ }, auth(:active)
+ assert_response :success
+ assert json_response['items'][0].keys.include?('portable_data_hash')
+ assert not(json_response['items'][0].keys.include?('uuid'))
+ end
+
+ test "get index with select= (invalid attribute) responds 422" do
+ get "/arvados/v1/collections", {
+ :format => :json,
+ :select => ['bogus'].to_json
+ }, auth(:active)
+ assert_response 422
+ assert_match /Invalid attribute.*bogus/, json_response['errors'].join(' ')
+ end
+
+ test "get index with select= (invalid attribute type) responds 422" do
+ get "/arvados/v1/collections", {
+ :format => :json,
+ :select => [['bogus']].to_json
+ }, auth(:active)
+ assert_response 422
+ assert_match /Invalid attribute.*bogus/, json_response['errors'].join(' ')
+ end
+
test "controller 404 response is json" do
get "/arvados/v1/thingsthatdonotexist", {:format => :xml}, auth(:active)
assert_response 404
yield props if block_given?
end
- def assert_auth_logged_with_clean_properties(auth, event_type)
- assert_logged(auth, event_type) do |props|
+ def assert_logged_with_clean_properties(obj, event_type, excluded_attr)
+ assert_logged(obj, event_type) do |props|
['old_attributes', 'new_attributes'].map { |k| props[k] }.compact
.each do |attributes|
- refute_includes(attributes, 'api_token',
- "auth log properties include sensitive API token")
+ refute_includes(attributes, excluded_attr,
+ "log properties includes #{excluded_attr}")
end
yield props if block_given?
end
auth.user = users(:spectator)
auth.api_client = api_clients(:untrusted)
auth.save!
- assert_auth_logged_with_clean_properties(auth, :create)
+ assert_logged_with_clean_properties(auth, :create, 'api_token')
auth.expires_at = Time.now
auth.save!
- assert_auth_logged_with_clean_properties(auth, :update)
+ assert_logged_with_clean_properties(auth, :update, 'api_token')
auth.destroy
- assert_auth_logged_with_clean_properties(auth, :destroy)
+ assert_logged_with_clean_properties(auth, :destroy, 'api_token')
end
test "use ownership and permission links to determine which logs a user can see" do
refute_includes result_ids, logs(notwant).id
end
end
+
+ test "manifest_text not included in collection logs" do
+ act_as_system_user do
+ coll = Collection.create(manifest_text: ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n")
+ assert_logged_with_clean_properties(coll, :create, 'manifest_text')
+ coll.name = "testing"
+ coll.save!
+ assert_logged_with_clean_properties(coll, :update, 'manifest_text')
+ coll.destroy
+ assert_logged_with_clean_properties(coll, :destroy, 'manifest_text')
+ end
+ end
end
// Config used by crunch-dispatch-slurm
type Config struct {
+ arvados.Client
+
SbatchArguments []string
PollPeriod arvados.Duration
config.PollPeriod = arvados.Duration(10 * time.Second)
}
+ if config.Client.APIHost != "" || config.Client.AuthToken != "" {
+ // Copy real configs into env vars so [a]
+ // MakeArvadosClient() uses them, and [b] they get
+ // propagated to crunch-run via SLURM.
+ os.Setenv("ARVADOS_API_HOST", config.Client.APIHost)
+ os.Setenv("ARVADOS_API_TOKEN", config.Client.AuthToken)
+ os.Setenv("ARVADOS_API_INSECURE", "")
+ if config.Client.Insecure {
+ os.Setenv("ARVADOS_API_INSECURE", "1")
+ }
+ os.Setenv("ARVADOS_KEEP_SERVICES", "")
+ os.Setenv("ARVADOS_EXTERNAL_CLIENT", "")
+ } else {
+ log.Printf("warning: Client credentials missing from config, so falling back on environment variables (deprecated).")
+ }
+
arv, err := arvadosclient.MakeArvadosClient()
if err != nil {
log.Printf("Error making Arvados client: %v", err)
var exampleConfigFile = []byte(`
{
+ "Client": {
+ "APIHost": "zzzzz.arvadosapi.com",
+ "AuthToken": "xyzzy",
+ "Insecure": false
+ },
"CrunchRunCommand": ["crunch-run"],
"PollPeriod": "10s",
"SbatchArguments": ["--partition=foo", "--exclude=node13"]