crunchrunner_re = re.compile(r"^\S+ \S+ \d+ \d+ stderr \S+ \S+ crunchrunner: \$\(task\.(tmpdir|outdir|keep)\)=(.*)")
+crunchrunner_git_commit = 'a3f2cb186e437bfce0031b024b2157b73ed2717d'
+
class ArvadosJob(object):
"""Submit and manage a Crunch job for executing a CWL CommandLineTool."""
filters = [["repository", "=", "arvados"],
["script", "=", "crunchrunner"],
- ["script_version", "in git", "9e5b98e8f5f4727856b53447191f9c06e3da2ba6"]]
+ ["script_version", "in git", crunchrunner_git_commit]]
if not self.arvrunner.ignore_docker_for_reuse:
filters.append(["docker_image_locator", "in docker", runtime_constraints["docker_image"]])
"script": "crunchrunner",
"repository": "arvados",
"script_version": "master",
- "minimum_script_version": "9e5b98e8f5f4727856b53447191f9c06e3da2ba6",
+ "minimum_script_version": crunchrunner_git_commit,
"script_parameters": {"tasks": [script_parameters]},
"runtime_constraints": runtime_constraints
},
return {
"script": "cwl-runner",
- "script_version": __version__,
+ "script_version": "master",
+ "minimum_script_version": "570509ab4d2ef93d870fd2b1f2eab178afb1bad9",
"repository": "arvados",
"script_parameters": self.job_order,
"runtime_constraints": {
args.quiet = False
args.ignore_docker_for_reuse = False
args.basedir = os.getcwd()
+ args.name = None
args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]}
outputObj = runner.arv_executor(t, job_order_object, **vars(args))
except Exception as e:
import cwltool.process
from schema_salad.ref_resolver import Loader
from .mock_discovery import get_rootDesc
+from .matcher import JsonDiffMatcher
if not os.getenv('ARVADOS_DEBUG'):
logging.getLogger('arvados.cwl-runner').setLevel(logging.WARN)
for j in arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access):
j.run(enable_reuse=enable_reuse)
runner.api.jobs().create.assert_called_with(
- body={
+ body=JsonDiffMatcher({
'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
'runtime_constraints': {},
'script_parameters': {
}],
},
'script_version': 'master',
- 'minimum_script_version': '9e5b98e8f5f4727856b53447191f9c06e3da2ba6',
+ 'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d',
'repository': 'arvados',
'script': 'crunchrunner',
'runtime_constraints': {
'min_ram_mb_per_node': 1024,
'min_scratch_mb_per_node': 2048 # tmpdirSize + outdirSize
}
- },
+ }),
find_or_create=enable_reuse,
filters=[['repository', '=', 'arvados'],
['script', '=', 'crunchrunner'],
- ['script_version', 'in git', '9e5b98e8f5f4727856b53447191f9c06e3da2ba6'],
+ ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'],
['docker_image_locator', 'in docker', 'arvados/jobs:'+arvados_cwl.__version__]]
)
for j in arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access):
j.run()
runner.api.jobs().create.assert_called_with(
- body={
+ body=JsonDiffMatcher({
'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
'runtime_constraints': {},
'script_parameters': {
}]
},
'script_version': 'master',
- 'minimum_script_version': '9e5b98e8f5f4727856b53447191f9c06e3da2ba6',
+ 'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d',
'repository': 'arvados',
'script': 'crunchrunner',
'runtime_constraints': {
'min_scratch_mb_per_node': 5024, # tmpdirSize + outdirSize
'keep_cache_mb_per_task': 512
}
- },
+ }),
find_or_create=True,
filters=[['repository', '=', 'arvados'],
['script', '=', 'crunchrunner'],
- ['script_version', 'in git', '9e5b98e8f5f4727856b53447191f9c06e3da2ba6'],
+ ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'],
['docker_image_locator', 'in docker', 'arvados/jobs:'+arvados_cwl.__version__]])
@mock.patch("arvados.collection.CollectionReader")
subwf = f.read()
runner.api.jobs().create.assert_called_with(
- body={
- 'minimum_script_version': '9e5b98e8f5f4727856b53447191f9c06e3da2ba6',
+ body=JsonDiffMatcher({
+ 'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d',
'repository': 'arvados',
'script_version': 'master',
'script': 'crunchrunner',
'docker_image': 'arvados/jobs:'+arvados_cwl.__version__,
'min_ram_mb_per_node': 1024
},
- 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'},
+ 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'}),
filters=[['repository', '=', 'arvados'],
['script', '=', 'crunchrunner'],
- ['script_version', 'in git', '9e5b98e8f5f4727856b53447191f9c06e3da2ba6'],
+ ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'],
['docker_image_locator', 'in docker', 'arvados/jobs:'+arvados_cwl.__version__]],
find_or_create=True)
'99999999999999999999999999999991+99/wf/submit_wf.cwl'
},
'repository': 'arvados',
- 'script_version': arvados_cwl.__version__,
+ 'script_version': 'master',
+ 'minimum_script_version': '570509ab4d2ef93d870fd2b1f2eab178afb1bad9',
'script': 'cwl-runner'
}
stubs.pipeline_component = stubs.expect_job_spec.copy()
'arv:enable_reuse': True
},
'repository': 'arvados',
- 'script_version': arvados_cwl.__version__,
+ 'script_version': 'master',
+ 'minimum_script_version': '570509ab4d2ef93d870fd2b1f2eab178afb1bad9',
'script': 'cwl-runner',
'job': {'state': 'Queued', 'uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'}
}
},
},
'repository': 'arvados',
- 'script_version': arvados_cwl.__version__,
+ 'script_version': 'master',
+ 'minimum_script_version': '570509ab4d2ef93d870fd2b1f2eab178afb1bad9',
'script': 'cwl-runner',
},
},
ENV DEBIAN_FRONTEND noninteractive
-RUN apt-get update -q && apt-get install -qy git python-pip python-virtualenv python-dev libcurl4-gnutls-dev libgnutls28-dev nodejs
+RUN apt-get update -q && apt-get install -qy git python-pip python-virtualenv python-dev libcurl4-gnutls-dev libgnutls28-dev nodejs python-pyasn1-modules
RUN pip install -U setuptools
func (runner *ContainerRunner) UpdateContainerFinal() error {
update := arvadosclient.Dict{}
update["state"] = runner.finalState
+ if runner.LogsPDH != nil {
+ update["log"] = *runner.LogsPDH
+ }
if runner.finalState == "Complete" {
- if runner.LogsPDH != nil {
- update["log"] = *runner.LogsPDH
- }
if runner.ExitCode != nil {
update["exit_code"] = *runner.ExitCode
}
}
var deadline time.Time
haveDeadline := false
- size, err := v.get(loc, buf)
+ size, err := v.get(ctx, loc, buf)
for err == nil && size == 0 && loc != "d41d8cd98f00b204e9800998ecf8427e" {
// Seeing a brand new empty block probably means we're
// in a race with CreateBlob, which under the hood
} else if time.Now().After(deadline) {
break
}
- time.Sleep(azureWriteRacePollTime)
- size, err = v.get(loc, buf)
+ select {
+ case <-ctx.Done():
+ return 0, ctx.Err()
+ case <-time.After(azureWriteRacePollTime):
+ }
+ size, err = v.get(ctx, loc, buf)
}
if haveDeadline {
log.Printf("Race ended with size==%d", size)
return size, err
}
-func (v *AzureBlobVolume) get(loc string, buf []byte) (int, error) {
+func (v *AzureBlobVolume) get(ctx context.Context, loc string, buf []byte) (int, error) {
+ ctx, cancel := context.WithCancel(ctx)
+ defer cancel()
expectSize := len(buf)
if azureMaxGetBytes < BlockSize {
// Unfortunately the handler doesn't tell us how long the blob
// We'll update this actualSize if/when we get the last piece.
actualSize := -1
pieces := (expectSize + azureMaxGetBytes - 1) / azureMaxGetBytes
- errors := make([]error, pieces)
+ errors := make(chan error, pieces)
var wg sync.WaitGroup
wg.Add(pieces)
for p := 0; p < pieces; p++ {
+ // Each goroutine retrieves one piece. If we hit an
+ // error, it is sent to the errors chan so get() can
+ // return it -- but only if the error happens before
+ // ctx is done. This way, if ctx is done before we hit
+ // any other error (e.g., requesting client has hung
+ // up), we return the original ctx.Err() instead of
+ // the secondary errors from the transfers that got
+ // interrupted as a result.
go func(p int) {
defer wg.Done()
startPos := p * azureMaxGetBytes
}
var rdr io.ReadCloser
var err error
- if startPos == 0 && endPos == expectSize {
- rdr, err = v.bsClient.GetBlob(v.ContainerName, loc)
- } else {
- rdr, err = v.bsClient.GetBlobRange(v.ContainerName, loc, fmt.Sprintf("%d-%d", startPos, endPos-1), nil)
+ gotRdr := make(chan struct{})
+ go func() {
+ defer close(gotRdr)
+ if startPos == 0 && endPos == expectSize {
+ rdr, err = v.bsClient.GetBlob(v.ContainerName, loc)
+ } else {
+ rdr, err = v.bsClient.GetBlobRange(v.ContainerName, loc, fmt.Sprintf("%d-%d", startPos, endPos-1), nil)
+ }
+ }()
+ select {
+ case <-ctx.Done():
+ go func() {
+ <-gotRdr
+ if err == nil {
+ rdr.Close()
+ }
+ }()
+ return
+ case <-gotRdr:
}
if err != nil {
- errors[p] = err
+ errors <- err
+ cancel()
return
}
- defer rdr.Close()
+ go func() {
+ // Close the reader when the client
+ // hangs up or another piece fails
+ // (possibly interrupting ReadFull())
+ // or when all pieces succeed and
+ // get() returns.
+ <-ctx.Done()
+ rdr.Close()
+ }()
n, err := io.ReadFull(rdr, buf[startPos:endPos])
if pieces == 1 && (err == io.ErrUnexpectedEOF || err == io.EOF) {
// If we don't know the actual size,
// and just tried reading 64 MiB, it's
// normal to encounter EOF.
} else if err != nil {
- errors[p] = err
+ if ctx.Err() == nil {
+ errors <- err
+ }
+ cancel()
+ return
}
if p == pieces-1 {
actualSize = startPos + n
}(p)
}
wg.Wait()
- for _, err := range errors {
- if err != nil {
- return 0, v.translateError(err)
- }
+ close(errors)
+ if len(errors) > 0 {
+ return 0, v.translateError(<-errors)
+ }
+ if ctx.Err() != nil {
+ return 0, ctx.Err()
}
return actualSize, nil
}
if trashed {
return os.ErrNotExist
}
- rdr, err := v.bsClient.GetBlob(v.ContainerName, loc)
+ var rdr io.ReadCloser
+ gotRdr := make(chan struct{})
+ go func() {
+ defer close(gotRdr)
+ rdr, err = v.bsClient.GetBlob(v.ContainerName, loc)
+ }()
+ select {
+ case <-ctx.Done():
+ go func() {
+ <-gotRdr
+ if err == nil {
+ rdr.Close()
+ }
+ }()
+ return ctx.Err()
+ case <-gotRdr:
+ }
if err != nil {
return v.translateError(err)
}
if v.ReadOnly {
return MethodDisabledError
}
- return v.bsClient.CreateBlockBlobFromReader(v.ContainerName, loc, uint64(len(block)), bytes.NewReader(block), nil)
+ // Send the block data through a pipe, so that (if we need to)
+ // we can close the pipe early and abandon our
+ // CreateBlockBlobFromReader() goroutine, without worrying
+ // about CreateBlockBlobFromReader() accessing our block
+ // buffer after we release it.
+ bufr, bufw := io.Pipe()
+ go func() {
+ io.Copy(bufw, bytes.NewReader(block))
+ bufw.Close()
+ }()
+ errChan := make(chan error)
+ go func() {
+ errChan <- v.bsClient.CreateBlockBlobFromReader(v.ContainerName, loc, uint64(len(block)), bufr, nil)
+ }()
+ select {
+ case <-ctx.Done():
+ theConfig.debugLogf("%s: taking CreateBlockBlobFromReader's input away: %s", v, ctx.Err())
+ // Our pipe might be stuck in Write(), waiting for
+ // io.Copy() to read. If so, un-stick it. This means
+ // CreateBlockBlobFromReader will get corrupt data,
+ // but that's OK: the size won't match, so the write
+ // will fail.
+ go io.Copy(ioutil.Discard, bufr)
+ // CloseWithError() will return once pending I/O is done.
+ bufw.CloseWithError(ctx.Err())
+ theConfig.debugLogf("%s: abandoning CreateBlockBlobFromReader goroutine", v)
+ return ctx.Err()
+ case err := <-errChan:
+ return err
+ }
}
// Touch updates the last-modified property of a block blob.
}
}
+func TestAzureBlobVolumeContextCancelGet(t *testing.T) {
+ testAzureBlobVolumeContextCancel(t, func(ctx context.Context, v *TestableAzureBlobVolume) error {
+ v.PutRaw(TestHash, TestBlock)
+ _, err := v.Get(ctx, TestHash, make([]byte, BlockSize))
+ return err
+ })
+}
+
+func TestAzureBlobVolumeContextCancelPut(t *testing.T) {
+ testAzureBlobVolumeContextCancel(t, func(ctx context.Context, v *TestableAzureBlobVolume) error {
+ return v.Put(ctx, TestHash, make([]byte, BlockSize))
+ })
+}
+
+func TestAzureBlobVolumeContextCancelCompare(t *testing.T) {
+ testAzureBlobVolumeContextCancel(t, func(ctx context.Context, v *TestableAzureBlobVolume) error {
+ v.PutRaw(TestHash, TestBlock)
+ return v.Compare(ctx, TestHash, TestBlock2)
+ })
+}
+
+func testAzureBlobVolumeContextCancel(t *testing.T, testFunc func(context.Context, *TestableAzureBlobVolume) error) {
+ defer func(t http.RoundTripper) {
+ http.DefaultTransport = t
+ }(http.DefaultTransport)
+ http.DefaultTransport = &http.Transport{
+ Dial: (&azStubDialer{}).Dial,
+ }
+
+ v := NewTestableAzureBlobVolume(t, false, 3)
+ defer v.Teardown()
+ v.azHandler.race = make(chan chan struct{})
+
+ ctx, cancel := context.WithCancel(context.Background())
+ allDone := make(chan struct{})
+ go func() {
+ defer close(allDone)
+ err := testFunc(ctx, v)
+ if err != context.Canceled {
+ t.Errorf("got %T %q, expected %q", err, err, context.Canceled)
+ }
+ }()
+ releaseHandler := make(chan struct{})
+ select {
+ case <-allDone:
+ t.Error("testFunc finished without waiting for v.azHandler.race")
+ case <-time.After(10 * time.Second):
+ t.Error("timed out waiting to enter handler")
+ case v.azHandler.race <- releaseHandler:
+ }
+
+ cancel()
+
+ select {
+ case <-time.After(10 * time.Second):
+ t.Error("timed out waiting to cancel")
+ case <-allDone:
+ }
+
+ go func() {
+ <-releaseHandler
+ }()
+}
+
func (v *TestableAzureBlobVolume) PutRaw(locator string, data []byte) {
v.azHandler.PutRaw(v.ContainerName, locator, data)
}