sdk/cli
sdk/pam
sdk/python
+sdk/python:py3
sdk/ruby
sdk/go/arvados
sdk/go/arvadosclient
gcc --version | egrep ^gcc \
|| fatal "No gcc. Try: apt-get install build-essential"
echo -n 'fuse.h: '
- find /usr/include -wholename '*fuse/fuse.h' \
+ find /usr/include -path '*fuse/fuse.h' | egrep --max-count=1 . \
|| fatal "No fuse/fuse.h. Try: apt-get install libfuse-dev"
echo -n 'gnutls.h: '
- find /usr/include -wholename '*gnutls/gnutls.h' \
+ find /usr/include -path '*gnutls/gnutls.h' | egrep --max-count=1 . \
|| fatal "No gnutls/gnutls.h. Try: apt-get install libgnutls28-dev"
- echo -n 'pyconfig.h: '
- find /usr/include -name pyconfig.h | egrep --max-count=1 . \
- || fatal "No pyconfig.h. Try: apt-get install python-dev"
+ echo -n 'Python2 pyconfig.h: '
+ find /usr/include -path '*/python2*/pyconfig.h' | egrep --max-count=1 . \
+ || fatal "No Python2 pyconfig.h. Try: apt-get install python2.7-dev"
+ echo -n 'Python3 pyconfig.h: '
+ find /usr/include -path '*/python3*/pyconfig.h' | egrep --max-count=1 . \
+ || fatal "No Python3 pyconfig.h. Try: apt-get install python3-dev"
echo -n 'nginx: '
PATH="$PATH:/sbin:/usr/sbin:/usr/local/sbin" nginx -v \
|| fatal "No nginx. Try: apt-get install nginx"
# Deactivate Python 2 virtualenv
deactivate
+declare -a pythonstuff
+pythonstuff=(
+ sdk/pam
+ sdk/python
+ sdk/python:py3
+ sdk/cwl
+ services/dockercleaner:py3
+ services/fuse
+ services/nodemanager
+ tools/crunchstat-summary
+ )
+
# If Python 3 is available, set up its virtualenv in $VENV3DIR.
# Otherwise, skip dependent tests.
PYTHON3=$(which python3)
-if [ "0" = "$?" ]; then
+if [[ ${?} = 0 ]]; then
setup_virtualenv "$VENV3DIR" --python python3
else
PYTHON3=
- skip[services/dockercleaner]=1
cat >&2 <<EOF
-Warning: python3 could not be found
-services/dockercleaner install and tests will be skipped
+Warning: python3 could not be found. Python 3 tests will be skipped.
EOF
fi
# module. We can't actually *test* the Python SDK yet though, because
# its own test suite brings up some of those other programs (like
# keepproxy).
-declare -a pythonstuff
-pythonstuff=(
- sdk/pam
- sdk/python
- sdk/cwl
- services/fuse
- services/nodemanager
- tools/crunchstat-summary
- )
for p in "${pythonstuff[@]}"
do
- do_install "$p" pip
+ dir=${p%:py3}
+ if [[ ${dir} = ${p} ]]; then
+ if [[ -z ${skip[python2]} ]]; then
+ do_install ${dir} pip
+ fi
+ elif [[ -n ${PYTHON3} ]]; then
+ if [[ -z ${skip[python3]} ]]; then
+ do_install ${dir} pip "$VENV3DIR/bin/"
+ fi
+ fi
done
-if [ -n "$PYTHON3" ]; then
- do_install services/dockercleaner pip "$VENV3DIR/bin/"
-fi
install_apiserver() {
cd "$WORKSPACE/services/api" \
if [ -n "$CONFIGSRC" ]
then
- for f in database.yml application.yml
+ for f in database.yml
do
cp "$CONFIGSRC/$f" config/ || fatal "$f"
done
fi
- # Fill in a random secret_token and blob_signing_key for testing
- SECRET_TOKEN=`echo 'puts rand(2**512).to_s(36)' |ruby`
- BLOB_SIGNING_KEY=`echo 'puts rand(2**512).to_s(36)' |ruby`
-
- sed -i'' -e "s:SECRET_TOKEN:$SECRET_TOKEN:" config/application.yml
- sed -i'' -e "s:BLOB_SIGNING_KEY:$BLOB_SIGNING_KEY:" config/application.yml
-
- # Set up empty git repo (for git tests)
- GITDIR=$(mktemp -d)
- sed -i'' -e "s:/var/cache/git:$GITDIR:" config/application.default.yml
-
- rm -rf $GITDIR
- mkdir -p $GITDIR/test
- cd $GITDIR/test \
- && git init \
- && git config user.email "jenkins@ci.curoverse.com" \
- && git config user.name "Jenkins, CI" \
- && touch tmp \
- && git add tmp \
- && git commit -m 'initial commit'
-
# Clear out any lingering postgresql connections to the test
# database, so that we can drop it. This assumes the current user
# is a postgresql superuser.
&& test_database=$(python -c "import yaml; print yaml.load(file('config/database.yml'))['test']['database']") \
&& psql "$test_database" -c "SELECT pg_terminate_backend (pg_stat_activity.procpid::int) FROM pg_stat_activity WHERE pg_stat_activity.datname = '$test_database';" 2>/dev/null
+ mkdir -p "$WORKSPACE/services/api/tmp/pids"
+
cd "$WORKSPACE/services/api" \
&& RAILS_ENV=test bundle exec rake db:drop \
&& RAILS_ENV=test bundle exec rake db:setup \
}
do_install apps/workbench workbench
+unset http_proxy https_proxy no_proxy
+
test_doclinkchecker() {
(
set -e
for p in "${pythonstuff[@]}"
do
- do_test "$p" pip
+ dir=${p%:py3}
+ if [[ ${dir} = ${p} ]]; then
+ if [[ -z ${skip[python2]} ]]; then
+ do_test ${dir} pip
+ fi
+ elif [[ -n ${PYTHON3} ]]; then
+ if [[ -z ${skip[python3]} ]]; then
+ do_test ${dir} pip "$VENV3DIR/bin/"
+ fi
+ fi
done
-do_test services/dockercleaner pip "$VENV3DIR/bin/"
for g in "${gostuff[@]}"
do
file "sdk/python/arvados/index.html" do |t|
`which epydoc`
if $? == 0
- `epydoc --html --parse-only -o sdk/python/arvados ../sdk/python/arvados/`
+ STDERR.puts `epydoc --html --parse-only -o sdk/python/arvados ../sdk/python/arvados/ 2>&1`
+ raise if $? != 0
else
puts "Warning: epydoc not found, Python documentation will not be generated".colorize(:light_red)
end
-import httplib
+from __future__ import print_function
+from __future__ import absolute_import
+from future import standard_library
+standard_library.install_aliases()
+from builtins import object
+import bz2
+import fcntl
+import hashlib
+import http.client
import httplib2
+import json
import logging
import os
import pprint
-import sys
-import types
-import subprocess
-import json
-import UserDict
import re
-import hashlib
import string
-import bz2
-import zlib
-import fcntl
-import time
+import subprocess
+import sys
import threading
+import time
+import types
+import zlib
+
+if sys.version_info >= (3, 0):
+ from collections import UserDict
+else:
+ from UserDict import UserDict
-from .api import api, http_cache
-from collection import CollectionReader, CollectionWriter, ResumableCollectionWriter
-from keep import *
-from stream import *
-from arvfile import StreamFileReader
-from retry import RetryLoop
-import errors
-import util
+from .api import api, api_from_config, http_cache
+from .collection import CollectionReader, CollectionWriter, ResumableCollectionWriter
+from arvados.keep import *
+from arvados.stream import *
+from .arvfile import StreamFileReader
+from .retry import RetryLoop
+import arvados.errors as errors
+import arvados.util as util
# Set up Arvados logging based on the user's configuration.
# All Arvados code should log under the arvados hierarchy.
for tries_left in RetryLoop(num_retries=num_retries, backoff_start=2):
try:
task = api('v1').job_tasks().get(uuid=os.environ['TASK_UUID']).execute()
- task = UserDict.UserDict(task)
+ task = UserDict(task)
task.set_output = types.MethodType(task_set_output, task)
task.tmpdir = os.environ['TASK_WORK']
_current_task = task
for tries_left in RetryLoop(num_retries=num_retries, backoff_start=2):
try:
job = api('v1').jobs().get(uuid=os.environ['JOB_UUID']).execute()
- job = UserDict.UserDict(job)
+ job = UserDict(job)
job.tmpdir = os.environ['JOB_WORK']
_current_job = job
return job
class JobTask(object):
def __init__(self, parameters=dict(), runtime_constraints=dict()):
- print "init jobtask %s %s" % (parameters, runtime_constraints)
+ print("init jobtask %s %s" % (parameters, runtime_constraints))
-class job_setup:
+class job_setup(object):
@staticmethod
def one_task_per_input_file(if_sequence=0, and_end_task=True, input_as_path=False, api_client=None):
if if_sequence != current_task()['sequence']:
-import config
+from __future__ import absolute_import
+from . import config
def normalize_stream(stream_name, stream):
"""Take manifest stream and return a list of tokens in normalized format.
sortedfiles.sort()
blocks = {}
- streamoffset = 0L
+ streamoffset = 0
# Go through each file and add each referenced block exactly once.
for streamfile in sortedfiles:
for segment in stream[streamfile]:
+from __future__ import division
+from builtins import object
import logging
_logger = logging.getLogger('arvados.ranges')
self.segment_offset == other.segment_offset)
def first_block(data_locators, range_start):
- block_start = 0L
+ block_start = 0
# range_start/block_start is the inclusive lower bound
# range_end/block_end is the exclusive upper bound
hi = len(data_locators)
lo = 0
- i = int((hi + lo) / 2)
+ i = (hi + lo) // 2
block_size = data_locators[i].range_size
block_start = data_locators[i].range_start
block_end = block_start + block_size
lo = i
else:
hi = i
- i = int((hi + lo) / 2)
+ i = (hi + lo) // 2
block_size = data_locators[i].range_size
block_start = data_locators[i].range_start
block_end = block_start + block_size
+from __future__ import absolute_import
+from future import standard_library
+standard_library.install_aliases()
+from builtins import range
import collections
-import httplib
+import http.client
import httplib2
import json
import logging
import apiclient
from apiclient import discovery as apiclient_discovery
from apiclient import errors as apiclient_errors
-import config
-import errors
-import util
-import cache
+from . import config
+from . import errors
+from . import util
+from . import cache
_logger = logging.getLogger('arvados.api')
# High probability of failure due to connection atrophy. Make
# sure this request [re]opens a new connection by closing and
# forgetting all cached connections first.
- for conn in self.connections.itervalues():
+ for conn in self.connections.values():
conn.close()
self.connections.clear()
self._last_request_time = time.time()
try:
return self.orig_http_request(uri, method, **kwargs)
- except httplib.HTTPException:
+ except http.client.HTTPException:
_logger.debug("Retrying API request in %d s after HTTP error",
delay, exc_info=True)
except socket.error:
# httplib2 reopens connections when needed.
_logger.debug("Retrying API request in %d s after socket error",
delay, exc_info=True)
- for conn in self.connections.itervalues():
+ for conn in self.connections.values():
conn.close()
time.sleep(delay)
delay = delay * self._retry_delay_backoff
def _cast_objects_too(value, schema_type):
global _cast_orig
if (type(value) != type('') and
+ type(value) != type(b'') and
(schema_type == 'object' or schema_type == 'array')):
return json.dumps(value)
else:
-import functools
-import os
-import zlib
+from __future__ import absolute_import
+from __future__ import division
+from future import standard_library
+from future.utils import listitems, listvalues
+standard_library.install_aliases()
+from builtins import range
+from builtins import object
import bz2
-import config
-import hashlib
-import threading
-import Queue
+import collections
import copy
import errno
-import re
+import functools
+import hashlib
import logging
-import collections
+import os
+import queue
+import re
+import sys
+import threading
import uuid
+import zlib
+from . import config
from .errors import KeepWriteError, AssertionError, ArgumentError
from .keep import KeepLocator
from ._normalize_stream import normalize_stream
class ArvadosFileReaderBase(_FileLikeObjectBase):
def __init__(self, name, mode, num_retries=None):
super(ArvadosFileReaderBase, self).__init__(name, mode)
- self._filepos = 0L
+ self._binary = 'b' in mode
+ if sys.version_info >= (3, 0) and not self._binary:
+ raise NotImplementedError("text mode {!r} is not implemented".format(mode))
+ self._filepos = 0
self.num_retries = num_retries
self._readline_cache = (None, None)
pos += self._filepos
elif whence == os.SEEK_END:
pos += self.size()
- if pos < 0L:
+ if pos < 0:
raise IOError(errno.EINVAL, "Tried to seek to negative file offset.")
self._filepos = pos
return self._filepos
def readall(self, size=2**20, num_retries=None):
while True:
data = self.read(size, num_retries=num_retries)
- if data == '':
+ if len(data) == 0:
break
yield data
data = [cache_data]
self._filepos += len(cache_data)
else:
- data = ['']
+ data = [b'']
data_size = len(data[-1])
- while (data_size < size) and ('\n' not in data[-1]):
+ while (data_size < size) and (b'\n' not in data[-1]):
next_read = self.read(2 ** 20, num_retries=num_retries)
if not next_read:
break
data.append(next_read)
data_size += len(next_read)
- data = ''.join(data)
+ data = b''.join(data)
try:
- nextline_index = data.index('\n') + 1
+ nextline_index = data.index(b'\n') + 1
except ValueError:
nextline_index = len(data)
nextline_index = min(nextline_index, size)
self._filepos -= len(data) - nextline_index
self._readline_cache = (self.tell(), data[nextline_index:])
- return data[:nextline_index]
+ return data[:nextline_index].decode()
@_FileLikeObjectBase._before_close
@retry_method
data_size += len(s)
if data_size >= sizehint:
break
- return ''.join(data).splitlines(True)
+ return b''.join(data).decode().splitlines(True)
def size(self):
raise IOError(errno.ENOSYS, "Not implemented")
def read(self, size, num_retries=None):
"""Read up to 'size' bytes from the stream, starting at the current file position"""
if size == 0:
- return ''
+ return b''
- data = ''
+ data = b''
available_chunks = locators_and_ranges(self.segments, self._filepos, size)
if available_chunks:
lr = available_chunks[0]
data = self._stream.readfrom(lr.locator+lr.segment_offset,
- lr.segment_size,
- num_retries=num_retries)
+ lr.segment_size,
+ num_retries=num_retries)
self._filepos += len(data)
return data
def readfrom(self, start, size, num_retries=None):
"""Read up to 'size' bytes from the stream, starting at 'start'"""
if size == 0:
- return ''
+ return b''
data = []
for lr in locators_and_ranges(self.segments, start, size):
data.append(self._stream.readfrom(lr.locator+lr.segment_offset, lr.segment_size,
num_retries=num_retries))
- return ''.join(data)
+ return b''.join(data)
def as_manifest(self):
segs = []
"""
if self._state == _BufferBlock.WRITABLE:
+ if not isinstance(data, bytes) and not isinstance(data, memoryview):
+ data = data.encode()
while (self.write_pointer+len(data)) > len(self.buffer_block):
new_buffer_block = bytearray(len(self.buffer_block) * 2)
new_buffer_block[0:self.write_pointer] = self.buffer_block[0:self.write_pointer]
# blocks pending. If they are full 64 MiB blocks, that means up to
# 256 MiB of internal buffering, which is the same size as the
# default download block cache in KeepClient.
- self._put_queue = Queue.Queue(maxsize=2)
+ self._put_queue = queue.Queue(maxsize=2)
self._put_threads = []
- for i in xrange(0, self.num_put_threads):
+ for i in range(0, self.num_put_threads):
thread = threading.Thread(target=self._commit_bufferblock_worker)
self._put_threads.append(thread)
thread.daemon = True
@synchronized
def start_get_threads(self):
if self._prefetch_threads is None:
- self._prefetch_queue = Queue.Queue()
+ self._prefetch_queue = queue.Queue()
self._prefetch_threads = []
- for i in xrange(0, self.num_get_threads):
+ for i in range(0, self.num_get_threads):
thread = threading.Thread(target=self._block_prefetch_worker)
self._prefetch_threads.append(thread)
thread.daemon = True
# A WRITABLE block with its owner.closed() implies that it's
# size is <= KEEP_BLOCK_SIZE/2.
try:
- small_blocks = [b for b in self._bufferblocks.values() if b.state() == _BufferBlock.WRITABLE and b.owner.closed()]
+ small_blocks = [b for b in listvalues(self._bufferblocks) if b.state() == _BufferBlock.WRITABLE and b.owner.closed()]
except AttributeError:
# Writable blocks without owner shouldn't exist.
raise UnownedBlockError()
self.repack_small_blocks(force=True, sync=True)
with self.lock:
- items = self._bufferblocks.items()
+ items = listitems(self._bufferblocks)
for k,v in items:
if v.state() != _BufferBlock.COMMITTED and v.owner:
with self.lock:
if len(self._segments) != len(othersegs):
return False
- for i in xrange(0, len(othersegs)):
+ for i in range(0, len(othersegs)):
seg1 = self._segments[i]
seg2 = othersegs[i]
loc1 = seg1.locator
"""
self._writers.remove(writer)
- if flush or self.size() > config.KEEP_BLOCK_SIZE / 2:
+ if flush or self.size() > config.KEEP_BLOCK_SIZE // 2:
# File writer closed, not small enough for repacking
self.flush()
elif self.closed():
with self.lock:
if size == 0 or offset >= self.size():
- return ''
+ return b''
readsegs = locators_and_ranges(self._segments, offset, size)
prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE, limit=32)
self.parent._my_block_manager().block_prefetch(lr.locator)
locs.add(lr.locator)
- return ''.join(data)
+ return b''.join(data)
def _repack_writes(self, num_retries):
"""Optimize buffer block by repacking segments in file sequence.
necessary.
"""
+ if not isinstance(data, bytes) and not isinstance(data, memoryview):
+ data = data.encode()
if len(data) == 0:
return
"""
- def __init__(self, arvadosfile, num_retries=None):
- super(ArvadosFileReader, self).__init__(arvadosfile.name, "r", num_retries=num_retries)
+ def __init__(self, arvadosfile, mode="r", num_retries=None):
+ super(ArvadosFileReader, self).__init__(arvadosfile.name, mode=mode, num_retries=num_retries)
self.arvadosfile = arvadosfile
def size(self):
data.append(rd)
self._filepos += len(rd)
rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries)
- return ''.join(data)
+ return b''.join(data)
else:
data = self.arvadosfile.readfrom(self._filepos, size, num_retries, exact=True)
self._filepos += len(data)
"""
def __init__(self, arvadosfile, mode, num_retries=None):
- super(ArvadosFileWriter, self).__init__(arvadosfile, num_retries=num_retries)
- self.mode = mode
+ super(ArvadosFileWriter, self).__init__(arvadosfile, mode=mode, num_retries=num_retries)
self.arvadosfile.add_writer(self)
def writable(self):
+from builtins import object
import errno
-import md5
+import hashlib
import os
import tempfile
import time
return self._dir
def _filename(self, url):
- return os.path.join(self._dir, md5.new(url).hexdigest()+'.tmp')
+ return os.path.join(self._dir, hashlib.md5(url.encode('utf-8')).hexdigest()+'.tmp')
def get(self, url):
filename = self._filename(url)
try:
with open(filename, 'rb') as f:
return f.read()
- except IOError, OSError:
+ except (IOError, OSError):
return None
def set(self, url, content):
return None
try:
try:
- f = os.fdopen(fd, 'w')
+ f = os.fdopen(fd, 'wb')
except:
os.close(fd)
raise
+from __future__ import absolute_import
+from future.utils import listitems, listvalues, viewkeys
+from builtins import str
+from past.builtins import basestring
+from builtins import object
import functools
import logging
import os
from stat import *
from .arvfile import split, _FileLikeObjectBase, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, _BlockManager, synchronized, must_be_writable, NoopLock
-from keep import KeepLocator, KeepClient
+from .keep import KeepLocator, KeepClient
from .stream import StreamReader
from ._normalize_stream import normalize_stream
from ._ranges import Range, LocatorAndRange
from .safeapi import ThreadSafeApiCache
-import config
-import errors
-import util
-import events
+import arvados.config as config
+import arvados.errors as errors
+import arvados.util
+import arvados.events as events
from arvados.retry import retry_method
_logger = logging.getLogger('arvados.collection')
if fields:
clean_fields = fields[:1] + [
(re.sub(r'\+[^\d][^\+]*', '', x)
- if re.match(util.keep_locator_pattern, x)
+ if re.match(arvados.util.keep_locator_pattern, x)
else x)
for x in fields[1:]]
clean += [' '.join(clean_fields), "\n"]
def _work_trees(self):
path, stream_name, max_manifest_depth = self._queued_trees[0]
- d = util.listdir_recursive(
+ d = arvados.util.listdir_recursive(
path, max_depth = (None if max_manifest_depth == 0 else 0))
if d:
self._queue_dirents(stream_name, d)
self.do_queued_work()
def write(self, newdata):
- if hasattr(newdata, '__iter__'):
+ if isinstance(newdata, bytes):
+ pass
+ elif isinstance(newdata, str):
+ newdata = newdata.encode()
+ elif hasattr(newdata, '__iter__'):
for s in newdata:
self.write(s)
return
return self._last_open
def flush_data(self):
- data_buffer = ''.join(self._data_buffer)
+ data_buffer = b''.join(self._data_buffer)
if data_buffer:
self._current_stream_locators.append(
self._my_keep().put(
sending manifest_text() to the API server's "create
collection" endpoint.
"""
- return self._my_keep().put(self.manifest_text(), copies=self.replication)
+ return self._my_keep().put(self.manifest_text().encode(),
+ copies=self.replication)
def portable_data_hash(self):
- stripped = self.stripped_manifest()
+ stripped = self.stripped_manifest().encode()
return hashlib.md5(stripped).hexdigest() + '+' + str(len(stripped))
def manifest_text(self):
return writer
def check_dependencies(self):
- for path, orig_stat in self._dependencies.items():
+ for path, orig_stat in listitems(self._dependencies):
if not S_ISREG(orig_stat[ST_MODE]):
raise errors.StaleWriterStateError("{} not file".format(path))
try:
:path:
path to a file in the collection
:mode:
- one of "r", "r+", "w", "w+", "a", "a+"
+ a string consisting of "r", "w", or "a", optionally followed
+ by "b" or "t", optionally followed by "+".
+ :"b":
+ binary mode: write() accepts bytes, read() returns bytes.
+ :"t":
+ text mode (default): write() accepts strings, read() returns strings.
:"r":
opens for reading
:"r+":
the end of the file. Writing does not affect the file pointer for
reading.
"""
- mode = mode.replace("b", "")
- if len(mode) == 0 or mode[0] not in ("r", "w", "a"):
- raise errors.ArgumentError("Bad mode '%s'" % mode)
- create = (mode != "r")
- if create and not self.writable():
- raise IOError(errno.EROFS, "Collection is read only")
+ if not re.search(r'^[rwa][bt]?\+?$', mode):
+ raise errors.ArgumentError("Invalid mode {!r}".format(mode))
- if create:
- arvfile = self.find_or_create(path, FILE)
- else:
+ if mode[0] == 'r' and '+' not in mode:
+ fclass = ArvadosFileReader
arvfile = self.find(path)
+ elif not self.writable():
+ raise IOError(errno.EROFS, "Collection is read only")
+ else:
+ fclass = ArvadosFileWriter
+ arvfile = self.find_or_create(path, FILE)
if arvfile is None:
raise IOError(errno.ENOENT, "File not found", path)
if not isinstance(arvfile, ArvadosFile):
raise IOError(errno.EISDIR, "Is a directory", path)
- if mode[0] == "w":
+ if mode[0] == 'w':
arvfile.truncate(0)
- name = os.path.basename(path)
-
- if mode == "r":
- return ArvadosFileReader(arvfile, num_retries=self.num_retries)
- else:
- return ArvadosFileWriter(arvfile, mode, num_retries=self.num_retries)
+ return fclass(arvfile, mode=mode, num_retries=self.num_retries)
def modified(self):
"""Determine if the collection has been modified since last commited."""
if value == self._committed:
return
if value:
- for k,v in self._items.items():
+ for k,v in listitems(self._items):
v.set_committed(True)
self._committed = True
else:
@synchronized
def __iter__(self):
"""Iterate over names of files and collections contained in this collection."""
- return iter(self._items.keys())
+ return iter(viewkeys(self._items))
@synchronized
def __getitem__(self, k):
@synchronized
def keys(self):
"""Get a list of names of files and collections directly contained in this collection."""
- return self._items.keys()
+ return viewkeys(self._items)
@synchronized
def values(self):
"""Get a list of files and collection objects directly contained in this collection."""
- return self._items.values()
+ return listvalues(self._items)
@synchronized
def items(self):
"""Get a list of (name, object) tuples directly contained in this collection."""
- return self._items.items()
+ return listitems(self._items)
def exists(self, path):
"""Test if there is a file or collection at `path`."""
item.remove(pathcomponents[1])
def _clonefrom(self, source):
- for k,v in source.items():
+ for k,v in listitems(source):
self._items[k] = v.clone(self, k)
def clone(self):
# then return API server's PDH response.
return self._portable_data_hash
else:
- stripped = self.portable_manifest_text()
+ stripped = self.portable_manifest_text().encode()
return hashlib.md5(stripped).hexdigest() + '+' + str(len(stripped))
@synchronized
@synchronized
def flush(self):
"""Flush bufferblocks to Keep."""
- for e in self.values():
+ for e in listvalues(self):
e.flush()
self.events = None
if manifest_locator_or_text:
- if re.match(util.keep_locator_pattern, manifest_locator_or_text):
+ if re.match(arvados.util.keep_locator_pattern, manifest_locator_or_text):
self._manifest_locator = manifest_locator_or_text
- elif re.match(util.collection_uuid_pattern, manifest_locator_or_text):
+ elif re.match(arvados.util.collection_uuid_pattern, manifest_locator_or_text):
self._manifest_locator = manifest_locator_or_text
- elif re.match(util.manifest_pattern, manifest_locator_or_text):
+ elif re.match(arvados.util.manifest_pattern, manifest_locator_or_text):
self._manifest_text = manifest_locator_or_text
else:
raise errors.ArgumentError(
# mode. Return an exception, or None if successful.
try:
self._manifest_text = self._my_keep().get(
- self._manifest_locator, num_retries=self.num_retries)
+ self._manifest_locator, num_retries=self.num_retries).decode()
except Exception as e:
return e
error_via_api = None
error_via_keep = None
should_try_keep = ((self._manifest_text is None) and
- util.keep_locator_pattern.match(
+ arvados.util.keep_locator_pattern.match(
self._manifest_locator))
if ((self._manifest_text is None) and
- util.signed_locator_pattern.match(self._manifest_locator)):
+ arvados.util.signed_locator_pattern.match(self._manifest_locator)):
error_via_keep = self._populate_from_keep()
if self._manifest_text is None:
error_via_api = self._populate_from_api_server()
def _has_collection_uuid(self):
- return self._manifest_locator is not None and re.match(util.collection_uuid_pattern, self._manifest_locator)
+ return self._manifest_locator is not None and re.match(arvados.util.collection_uuid_pattern, self._manifest_locator)
def __enter__(self):
return self
stream_name = tok.replace('\\040', ' ')
blocks = []
segments = []
- streamoffset = 0L
+ streamoffset = 0
state = BLOCKS
self.find_or_create(stream_name, COLLECTION)
continue
if state == BLOCKS:
block_locator = re.match(r'[0-9a-f]{32}\+(\d+)(\+\S+)*', tok)
if block_locator:
- blocksize = long(block_locator.group(1))
+ blocksize = int(block_locator.group(1))
blocks.append(Range(tok, streamoffset, blocksize, 0))
streamoffset += blocksize
else:
if state == SEGMENTS:
file_segment = re.search(r'^(\d+):(\d+):(\S+)', tok)
if file_segment:
- pos = long(file_segment.group(1))
- size = long(file_segment.group(2))
+ pos = int(file_segment.group(1))
+ size = int(file_segment.group(2))
name = file_segment.group(3).replace('\\040', ' ')
filepath = os.path.join(stream_name, name)
afile = self.find_or_create(filepath, FILE)
-#!/usr/bin/env python
-
import argparse
import errno
import os
-#! /usr/bin/env python
-
# arv-copy [--recursive] [--no-recursive] object-uuid src dst
#
# Copies an object from Arvados instance src to instance dst.
# instances src and dst. If either of these files is not found,
# arv-copy will issue an error.
+from __future__ import division
+from future import standard_library
+from future.utils import listvalues
+standard_library.install_aliases()
+from past.builtins import basestring
+from builtins import object
import argparse
import contextlib
import getpass
import sys
import logging
import tempfile
-import urlparse
+import urllib.parse
import arvados
import arvados.config
abort("cannot copy object {} of type {}".format(args.object_uuid, t))
# Clean up any outstanding temp git repositories.
- for d in local_repo_dir.values():
+ for d in listvalues(local_repo_dir):
shutil.rmtree(d, ignore_errors=True)
# If no exception was thrown and the response does not have an
be None if that is not known.
"""
errors = []
- for cname, cspec in template_components.iteritems():
+ for cname, cspec in template_components.items():
def add_error(errmsg):
errors.append("{}: {}".format(cname, errmsg))
if not isinstance(cspec, dict):
# names. The return value is undefined.
#
def copy_git_repos(p, src, dst, dst_repo, args):
- for component in p['components'].itervalues():
+ for component in p['components'].values():
migrate_jobspec(component, src, dst, dst_repo, args)
if 'job' in component:
migrate_jobspec(component['job'], src, dst, dst_repo, args)
git_url = None
for url in priority:
if url.startswith("http"):
- u = urlparse.urlsplit(url)
- baseurl = urlparse.urlunsplit((u.scheme, u.netloc, "", "", ""))
+ u = urllib.parse.urlsplit(url)
+ baseurl = urllib.parse.urlunsplit((u.scheme, u.netloc, "", "", ""))
git_config = ["-c", "credential.%s/.username=none" % baseurl,
"-c", "credential.%s/.helper=!cred(){ cat >/dev/null; if [ \"$1\" = get ]; then echo password=$ARVADOS_API_TOKEN; fi; };cred" % baseurl]
else:
if git_url.startswith("http:"):
if allow_insecure_http:
- logger.warn("Using insecure git url %s but will allow this because %s", git_url, allow_insecure_http_opt)
+ logger.warning("Using insecure git url %s but will allow this because %s", git_url, allow_insecure_http_opt)
else:
raise Exception("Refusing to use insecure git url %s, use %s if you really want this." % (git_url, allow_insecure_http_opt))
runtime_constraints field from src to dst."""
logger.debug('copy_docker_images: {}'.format(pipeline['uuid']))
- for c_name, c_info in pipeline['components'].iteritems():
+ for c_name, c_info in pipeline['components'].items():
if ('runtime_constraints' in c_info and
'docker_image' in c_info['runtime_constraints']):
copy_docker_image(
def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
global api_client
-
+
+ if stdout is sys.stdout and hasattr(stdout, 'buffer'):
+ # in Python 3, write to stdout as binary
+ stdout = stdout.buffer
+
args = parse_arguments(arguments, stdout, stderr)
if api_client is None:
api_client = arvados.api('v1')
open_flags |= os.O_EXCL
try:
if args.destination == "-":
- stdout.write(reader.manifest_text(strip=args.strip_manifest))
+ stdout.write(reader.manifest_text(strip=args.strip_manifest).encode())
else:
out_fd = os.open(args.destination, open_flags)
with os.fdopen(out_fd, 'wb') as out_file:
- out_file.write(reader.manifest_text(strip=args.strip_manifest))
+ out_file.write(reader.manifest_text(strip=args.strip_manifest).encode())
except (IOError, OSError) as error:
logger.error("can't write to '{}': {}".format(args.destination, error))
return 1
if args.hash:
digestor = hashlib.new(args.hash)
try:
- with s.open(f.name, 'r') as file_reader:
+ with s.open(f.name, 'rb') as file_reader:
for data in file_reader.readall():
if outfile:
outfile.write(data)
def files_in_collection(c):
# Sort first by file type, then alphabetically by file path.
- for i in sorted(c.keys(),
+ for i in sorted(list(c.keys()),
key=lambda k: (
isinstance(c[k], arvados.collection.Subcollection),
k.upper())):
-#!/usr/bin/env python
-
+from builtins import next
import argparse
import collections
import datetime
cmd = popen_docker(['inspect', '--format={{.Id}}', image_hash],
stdout=subprocess.PIPE)
try:
- image_id = next(cmd.stdout).strip()
+ image_id = next(cmd.stdout).decode().strip()
if image_id.startswith('sha256:'):
return 'v2'
elif ':' not in image_id:
def docker_image_compatible(api, image_hash):
supported = api._rootDesc.get('dockerImageFormats', [])
if not supported:
- logger.warn("server does not specify supported image formats (see docker_image_formats in server config).")
+ logger.warning("server does not specify supported image formats (see docker_image_formats in server config).")
return False
fmt = docker_image_format(image_hash)
# and add image listings for them, retaining the API server preference
# sorting.
images_start_size = len(images)
- for collection_uuid, link in hash_link_map.iteritems():
+ for collection_uuid, link in hash_link_map.items():
if not seen_image_names[collection_uuid]:
images.append(_new_image_listing(link, link['name']))
if len(images) > images_start_size:
if not docker_image_compatible(api, image_hash):
if args.force_image_format:
- logger.warn("forcing incompatible image")
+ logger.warning("forcing incompatible image")
else:
logger.error("refusing to store " \
"incompatible format (use --force-image-format to override)")
-#!/usr/bin/env python
-
from __future__ import print_function
+from __future__ import division
import argparse
import collections
return parser.parse_args(args)
def size_formatter(coll_file):
- return "{:>10}".format((coll_file.size + 1023) / 1024)
+ return "{:>10}".format((coll_file.size + 1023) // 1024)
def name_formatter(coll_file):
return "{}/{}".format(coll_file.stream_name, coll_file.name)
+from __future__ import print_function
+from __future__ import division
import argparse
import time
import sys
if pdh not in already_migrated and pdh not in need_migrate and (only_migrate is None or pdh in only_migrate):
need_migrate[pdh] = img
with CollectionReader(i["manifest_text"]) as c:
- if c.values()[0].size() > biggest:
- biggest = c.values()[0].size()
+ size = list(c.values())[0].size()
+ if size > biggest:
+ biggest = size
biggest_pdh = pdh
- totalbytes += c.values()[0].size()
+ totalbytes += size
if args.storage_driver == "vfs":
logger.info("Already migrated %i images", len(already_migrated))
logger.info("Need to migrate %i images", len(need_migrate))
logger.info("Using tempdir %s", tempfile.gettempdir())
- logger.info("Biggest image %s is about %i MiB", biggest_pdh, biggest/(2**20))
- logger.info("Total data to migrate about %i MiB", totalbytes/(2**20))
+ logger.info("Biggest image %s is about %i MiB", biggest_pdh, biggest>>20)
+ logger.info("Total data to migrate about %i MiB", totalbytes>>20)
df_out = subprocess.check_output(["df", "-B1", tempfile.gettempdir()])
ln = df_out.splitlines()[1]
filesystem, blocks, used, available, use_pct, mounted = re.match(r"^([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+)", ln).groups(1)
if int(available) <= will_need:
- logger.warn("Temp filesystem mounted at %s does not have enough space for biggest image (has %i MiB, needs %i MiB)", mounted, int(available)/(2**20), will_need/(2**20))
+ logger.warn("Temp filesystem mounted at %s does not have enough space for biggest image (has %i MiB, needs %i MiB)", mounted, int(available)>>20, will_need>>20)
if not args.force:
exit(1)
else:
success = []
failures = []
count = 1
- for old_image in need_migrate.values():
+ for old_image in list(need_migrate.values()):
if uuid_to_collection[old_image["collection"]]["portable_data_hash"] in already_migrated:
continue
oldcol = CollectionReader(uuid_to_collection[old_image["collection"]]["manifest_text"])
- tarfile = oldcol.keys()[0]
+ tarfile = list(oldcol.keys())[0]
logger.info("[%i/%i] Migrating %s:%s (%s) (%i MiB)", count, len(need_migrate), old_image["repo"],
- old_image["tag"], old_image["collection"], oldcol.values()[0].size()/(2**20))
+ old_image["tag"], old_image["collection"], list(oldcol.values())[0].size()>>20)
count += 1
start = time.time()
-#!/usr/bin/env python
-
-# TODO:
-# --md5sum - display md5 of each file as read from disk
-
+from __future__ import division
+from future.utils import listitems, listvalues
+from builtins import str
+from builtins import object
import argparse
import arvados
import arvados.collection
if len(args.paths) == 0:
args.paths = ['-']
- args.paths = map(lambda x: "-" if x == "/dev/stdin" else x, args.paths)
+ args.paths = ["-" if x == "/dev/stdin" else x for x in args.paths]
if len(args.paths) != 1 or os.path.isdir(args.paths[0]):
if args.filename:
@classmethod
def make_path(cls, args):
md5 = hashlib.md5()
- md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost'))
+ md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost').encode())
realpaths = sorted(os.path.realpath(path) for path in args.paths)
- md5.update('\0'.join(realpaths))
+ md5.update(b'\0'.join([p.encode() for p in realpaths]))
if any(os.path.isdir(path) for path in realpaths):
- md5.update("-1")
+ md5.update(b'-1')
elif args.filename:
- md5.update(args.filename)
+ md5.update(args.filename.encode())
return os.path.join(
arv_cmd.make_home_conf_dir(cls.CACHE_DIR, 0o700, 'raise'),
md5.hexdigest())
# we have a custom signal handler in place that raises SystemExit with
# the catched signal's code.
if not isinstance(e, SystemExit) or e.code != -2:
- self.logger.warning("Abnormal termination:\n{}".format(traceback.format_exc(e)))
+ self.logger.warning("Abnormal termination:\n{}".format(
+ traceback.format_exc()))
raise
finally:
if not self.dry_run:
Recursively get the total size of the collection
"""
size = 0
- for item in collection.values():
+ for item in listvalues(collection):
if isinstance(item, arvados.collection.Collection) or isinstance(item, arvados.collection.Subcollection):
size += self._collection_size(item)
else:
self.reporter(self.bytes_written, self.bytes_expected)
def _write_stdin(self, filename):
- output = self._local_collection.open(filename, 'w')
+ output = self._local_collection.open(filename, 'wb')
self._write(sys.stdin, output)
output.close()
def _upload_files(self):
for source, resume_offset, filename in self._files_to_upload:
- with open(source, 'r') as source_fd:
+ with open(source, 'rb') as source_fd:
with self._state_lock:
self._state['files'][source]['mtime'] = os.path.getmtime(source)
self._state['files'][source]['size'] = os.path.getsize(source)
if resume_offset > 0:
# Start upload where we left off
- output = self._local_collection.open(filename, 'a')
+ output = self._local_collection.open(filename, 'ab')
source_fd.seek(resume_offset)
else:
# Start from scratch
- output = self._local_collection.open(filename, 'w')
+ output = self._local_collection.open(filename, 'wb')
self._write(source_fd, output)
output.close(flush=False)
if self.use_cache:
# Set up cache file name from input paths.
md5 = hashlib.md5()
- md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost'))
+ md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost').encode())
realpaths = sorted(os.path.realpath(path) for path in self.paths)
- md5.update('\0'.join(realpaths))
+ md5.update(b'\0'.join([p.encode() for p in realpaths]))
if self.filename:
- md5.update(self.filename)
+ md5.update(self.filename.encode())
cache_filename = md5.hexdigest()
cache_filepath = os.path.join(
arv_cmd.make_home_conf_dir(self.CACHE_DIR, 0o700, 'raise'),
def collection_file_paths(self, col, path_prefix='.'):
"""Return a list of file paths by recursively go through the entire collection `col`"""
file_paths = []
- for name, item in col.items():
+ for name, item in listitems(col):
if isinstance(item, arvados.arvfile.ArvadosFile):
file_paths.append(os.path.join(path_prefix, name))
elif isinstance(item, arvados.collection.Subcollection):
def portable_data_hash(self):
pdh = self._my_collection().portable_data_hash()
- m = self._my_collection().stripped_manifest()
- local_pdh = hashlib.md5(m).hexdigest() + '+' + str(len(m))
+ m = self._my_collection().stripped_manifest().encode()
+ local_pdh = '{}+{}'.format(hashlib.md5(m).hexdigest(), len(m))
if pdh != local_pdh:
logger.warning("\n".join([
"arv-put: API server provided PDH differs from local manifest.",
locators.append(loc)
return locators
elif isinstance(item, arvados.collection.Collection):
- l = [self._datablocks_on_item(x) for x in item.values()]
+ l = [self._datablocks_on_item(x) for x in listvalues(item)]
# Fast list flattener method taken from:
# http://stackoverflow.com/questions/952914/making-a-flat-list-out-of-list-of-lists-in-python
return [loc for sublist in l for loc in sublist]
if not output.endswith('\n'):
stdout.write('\n')
- for sigcode, orig_handler in orig_signal_handlers.items():
+ for sigcode, orig_handler in listitems(orig_signal_handlers):
signal.signal(sigcode, orig_handler)
if status != 0:
-#!/usr/bin/env python
-
+from __future__ import print_function
+from __future__ import absolute_import
+from builtins import range
+from past.builtins import basestring
+from builtins import object
import arvados
import arvados.commands.ws as ws
import argparse
import re
import os
import stat
-import put
+from . import put
import time
import subprocess
import logging
if files:
uploadfiles(files, api, dry_run=args.dry_run, num_retries=args.retries, project=project)
- for i in xrange(1, len(slots)):
+ for i in range(1, len(slots)):
slots[i] = [("%s%s" % (c.prefix, c.fn)) if isinstance(c, ArvFile) else c for c in slots[i]]
component = {
group_parser.add_argument('-b', '--batch-size', type=int)
group_parser.add_argument('args', nargs=argparse.REMAINDER)
- for s in xrange(2, len(slots)):
- for i in xrange(0, len(slots[s])):
+ for s in range(2, len(slots)):
+ for i in range(0, len(slots[s])):
if slots[s][i] == '--':
inp = "input%i" % (s-2)
groupargs = group_parser.parse_args(slots[2][i+1:])
-#!/usr/bin/env python
-
+from __future__ import print_function
import sys
import logging
import argparse
elif 'status' in ev and ev['status'] == 200:
pass
else:
- print json.dumps(ev)
+ print(json.dumps(ev))
try:
ws = subscribe(arvados.api('v1'), filters, on_message, poll_fallback=args.poll_interval, last_log_id=last_log_id)
+from builtins import object
import json
import os
class ApiError(apiclient_errors.HttpError):
def _get_reason(self):
try:
- return '; '.join(json.loads(self.content)['errors'])
+ return '; '.join(json.loads(self.content.decode('utf-8'))['errors'])
except (KeyError, TypeError, ValueError):
return super(ApiError, self)._get_reason()
self._request_errors = OrderedDict(request_errors)
if self._request_errors:
exc_reports = [self._format_error(*err_pair)
- for err_pair in self._request_errors.iteritems()]
+ for err_pair in self._request_errors.items()]
base_msg = "{}: {}".format(message, "; ".join(exc_reports))
else:
base_msg = message
+from __future__ import absolute_import
+from future import standard_library
+standard_library.install_aliases()
+from builtins import str
+from builtins import object
import arvados
-import config
-import errors
-from retry import RetryLoop
+from . import config
+from . import errors
+from .retry import RetryLoop
import logging
import json
-import thread
+import _thread
import threading
import time
import os
self.on_event_cb(m)
except Exception as e:
_logger.exception("Unexpected exception from event callback.")
- thread.interrupt_main()
+ _thread.interrupt_main()
def on_closed(self):
if not self.is_closed.is_set():
- _logger.warn("Unexpected close. Reconnecting.")
+ _logger.warning("Unexpected close. Reconnecting.")
for tries_left in RetryLoop(num_retries=25, backoff_start=.1, max_wait=15):
try:
self._setup_event_client()
- _logger.warn("Reconnect successful.")
+ _logger.warning("Reconnect successful.")
break
except Exception as e:
- _logger.warn("Error '%s' during websocket reconnect.", e)
+ _logger.warning("Error '%s' during websocket reconnect.", e)
if tries_left == 0:
_logger.exception("EventClient thread could not contact websocket server.")
self.is_closed.set()
- thread.interrupt_main()
+ _thread.interrupt_main()
return
def run_forever(self):
_logger.exception("PollClient thread could not contact API server.")
with self._closing_lock:
self._closing.set()
- thread.interrupt_main()
+ _thread.interrupt_main()
return
for i in items["items"]:
skip_old_events = [["id", ">", str(i["id"])]]
self.on_event(i)
except Exception as e:
_logger.exception("Unexpected exception from event callback.")
- thread.interrupt_main()
+ _thread.interrupt_main()
if items["items_available"] > len(items["items"]):
moreitems = True
if not moreitems:
try:
client = EventClient(uri_with_token, filters, on_event, last_log_id)
except Exception:
- _logger.warn("Failed to connect to websockets on %s" % endpoint)
+ _logger.warning("Failed to connect to websockets on %s" % endpoint)
raise
else:
return client
else:
_logger.info("Using polling because ARVADOS_DISABLE_WEBSOCKETS is true")
except Exception as e:
- _logger.warn("Falling back to polling after websocket error: %s" % e)
+ _logger.warning("Falling back to polling after websocket error: %s" % e)
p = PollClient(api, filters, on_event, poll_fallback, last_log_id)
p.start()
return p
-import cStringIO
+from __future__ import absolute_import
+from __future__ import division
+from future import standard_library
+standard_library.install_aliases()
+from builtins import next
+from builtins import str
+from builtins import range
+from builtins import object
import collections
import datetime
import hashlib
+import io
import logging
import math
import os
import pycurl
-import Queue
+import queue
import re
import socket
import ssl
import sys
import threading
-import timer
-import urlparse
+from . import timer
+import urllib.parse
+
+if sys.version_info >= (3, 0):
+ from io import BytesIO
+else:
+ from cStringIO import StringIO as BytesIO
import arvados
import arvados.config as config
return getattr(self, data_name)
def setter(self, hex_str):
if not arvados.util.is_hex(hex_str, length):
- raise ValueError("{} is not a {}-digit hex string: {}".
+ raise ValueError("{} is not a {}-digit hex string: {!r}".
format(name, length, hex_str))
setattr(self, data_name, hex_str)
return property(getter, setter)
self._cache = [c for c in self._cache if not (c.ready.is_set() and c.content is None)]
sm = sum([slot.size() for slot in self._cache])
while len(self._cache) > 0 and sm > self.cache_max:
- for i in xrange(len(self._cache)-1, -1, -1):
+ for i in range(len(self._cache)-1, -1, -1):
if self._cache[i].ready.is_set():
del self._cache[i]
break
def _get(self, locator):
# Test if the locator is already in the cache
- for i in xrange(0, len(self._cache)):
+ for i in range(0, len(self._cache)):
if self._cache[i].locator == locator:
n = self._cache[i]
if i != 0:
arvados.errors.HttpError,
)
- def __init__(self, root, user_agent_pool=Queue.LifoQueue(),
+ def __init__(self, root, user_agent_pool=queue.LifoQueue(),
upload_counter=None,
download_counter=None, **headers):
self.root = root
self._result = {'error': None}
self._usable = True
self._session = None
+ self._socket = None
self.get_headers = {'Accept': 'application/octet-stream'}
self.get_headers.update(headers)
self.put_headers = headers
def _get_user_agent(self):
try:
return self._user_agent_pool.get(block=False)
- except Queue.Empty:
+ except queue.Empty:
return pycurl.Curl()
def _put_user_agent(self, ua):
if hasattr(socket, 'TCP_KEEPIDLE'):
s.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 75)
s.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 75)
+ self._socket = s
return s
def get(self, locator, method="GET", timeout=None):
try:
with timer.Timer() as t:
self._headers = {}
- response_body = cStringIO.StringIO()
+ response_body = BytesIO()
curl.setopt(pycurl.NOSIGNAL, 1)
- curl.setopt(pycurl.OPENSOCKETFUNCTION, self._socket_open)
+ curl.setopt(pycurl.OPENSOCKETFUNCTION,
+ lambda *args, **kwargs: self._socket_open(*args, **kwargs))
curl.setopt(pycurl.URL, url.encode('utf-8'))
curl.setopt(pycurl.HTTPHEADER, [
- '{}: {}'.format(k,v) for k,v in self.get_headers.iteritems()])
+ '{}: {}'.format(k,v) for k,v in self.get_headers.items()])
curl.setopt(pycurl.WRITEFUNCTION, response_body.write)
curl.setopt(pycurl.HEADERFUNCTION, self._headerfunction)
if method == "HEAD":
curl.perform()
except Exception as e:
raise arvados.errors.HttpError(0, str(e))
+ finally:
+ if self._socket:
+ self._socket.close()
+ self._socket = None
self._result = {
'status_code': curl.getinfo(pycurl.RESPONSE_CODE),
'body': response_body.getvalue(),
self._result['status_code'],
len(self._result['body']),
t.msecs,
- (len(self._result['body'])/(1024.0*1024))/t.secs if t.secs > 0 else 0)
+ 1.0*len(self._result['body'])/2**20/t.secs if t.secs > 0 else 0)
if self.download_counter:
self.download_counter.add(len(self._result['body']))
try:
with timer.Timer() as t:
self._headers = {}
- body_reader = cStringIO.StringIO(body)
- response_body = cStringIO.StringIO()
+ body_reader = BytesIO(body)
+ response_body = BytesIO()
curl.setopt(pycurl.NOSIGNAL, 1)
- curl.setopt(pycurl.OPENSOCKETFUNCTION, self._socket_open)
+ curl.setopt(pycurl.OPENSOCKETFUNCTION,
+ lambda *args, **kwargs: self._socket_open(*args, **kwargs))
curl.setopt(pycurl.URL, url.encode('utf-8'))
# Using UPLOAD tells cURL to wait for a "go ahead" from the
# Keep server (in the form of a HTTP/1.1 "100 Continue"
curl.setopt(pycurl.INFILESIZE, len(body))
curl.setopt(pycurl.READFUNCTION, body_reader.read)
curl.setopt(pycurl.HTTPHEADER, [
- '{}: {}'.format(k,v) for k,v in self.put_headers.iteritems()])
+ '{}: {}'.format(k,v) for k,v in self.put_headers.items()])
curl.setopt(pycurl.WRITEFUNCTION, response_body.write)
curl.setopt(pycurl.HEADERFUNCTION, self._headerfunction)
self._setcurltimeouts(curl, timeout)
curl.perform()
except Exception as e:
raise arvados.errors.HttpError(0, str(e))
+ finally:
+ if self._socket:
+ self._socket.close()
+ self._socket = None
self._result = {
'status_code': curl.getinfo(pycurl.RESPONSE_CODE),
- 'body': response_body.getvalue(),
+ 'body': response_body.getvalue().decode('utf-8'),
'headers': self._headers,
'error': False,
}
self._result['status_code'],
len(body),
t.msecs,
- (len(body)/(1024.0*1024))/t.secs if t.secs > 0 else 0)
+ 1.0*len(body)/2**20/t.secs if t.secs > 0 else 0)
if self.upload_counter:
self.upload_counter.add(len(body))
return True
curl.setopt(pycurl.LOW_SPEED_LIMIT, int(math.ceil(bandwidth_bps)))
def _headerfunction(self, header_line):
- header_line = header_line.decode('iso-8859-1')
+ if isinstance(header_line, bytes):
+ header_line = header_line.decode('iso-8859-1')
if ':' in header_line:
name, value = header_line.split(':', 1)
name = name.strip().lower()
# Returning None implies all bytes were written
- class KeepWriterQueue(Queue.Queue):
+ class KeepWriterQueue(queue.Queue):
def __init__(self, copies):
- Queue.Queue.__init__(self) # Old-style superclass
+ queue.Queue.__init__(self) # Old-style superclass
self.wanted_copies = copies
self.successful_copies = 0
self.response = None
return service, service_root
elif self.empty():
self.pending_tries_notification.notify_all()
- raise Queue.Empty
+ raise queue.Empty
else:
self.pending_tries_notification.wait()
if (not max_service_replicas) or (max_service_replicas >= copies):
num_threads = 1
else:
- num_threads = int(math.ceil(float(copies) / max_service_replicas))
+ num_threads = int(math.ceil(1.0*copies/max_service_replicas))
_logger.debug("Pool max threads is %d", num_threads)
self.workers = []
self.queue = KeepClient.KeepWriterQueue(copies)
while True:
try:
service, service_root = self.queue.get_next_task()
- except Queue.Empty:
+ except queue.Empty:
return
try:
locator, copies = self.do_task(service, service_root)
self.block_cache = block_cache if block_cache else KeepBlockCache()
self.timeout = timeout
self.proxy_timeout = proxy_timeout
- self._user_agent_pool = Queue.LifoQueue()
+ self._user_agent_pool = queue.LifoQueue()
self.upload_counter = Counter()
self.download_counter = Counter()
self.put_counter = Counter()
if not proxy_uris[i].endswith('/'):
proxy_uris[i] += '/'
# URL validation
- url = urlparse.urlparse(proxy_uris[i])
+ url = urllib.parse.urlparse(proxy_uris[i])
if not (url.scheme and url.netloc):
raise arvados.errors.ArgumentError("Invalid proxy URI: {}".format(proxy_uris[i]))
self.api_token = api_token
raise arvados.errors.NoKeepServersError()
# Precompute the base URI for each service.
- for r in self._gateway_services.itervalues():
+ for r in self._gateway_services.values():
host = r['service_host']
if not host.startswith('[') and host.find(':') >= 0:
# IPv6 URIs must be formatted like http://[::1]:80/...
_logger.debug(str(self._gateway_services))
self._keep_services = [
- ks for ks in self._gateway_services.itervalues()
+ ks for ks in self._gateway_services.values()
if not ks.get('service_type', '').startswith('gateway:')]
self._writable_services = [ks for ks in self._keep_services
if not ks.get('read_only')]
The weight is md5(h + u) where u is the last 15 characters of
the service endpoint's UUID.
"""
- return hashlib.md5(data_hash + service_uuid[-15:]).hexdigest()
+ return hashlib.md5((data_hash + service_uuid[-15:]).encode()).hexdigest()
def weighted_service_roots(self, locator, force_rebuild=False, need_writable=False):
"""Return an array of Keep service endpoints, in the order in
KeepClient is initialized.
"""
- if isinstance(data, unicode):
- data = data.encode("ascii")
- elif not isinstance(data, str):
- raise arvados.errors.ArgumentError("Argument 'data' to KeepClient.put is not type 'str'")
+ if not isinstance(data, bytes):
+ data = data.encode()
self.put_counter.add(1)
"""
md5 = hashlib.md5(data).hexdigest()
locator = '%s+%d' % (md5, len(data))
- with open(os.path.join(self.local_store, md5 + '.tmp'), 'w') as f:
+ with open(os.path.join(self.local_store, md5 + '.tmp'), 'wb') as f:
f.write(data)
os.rename(os.path.join(self.local_store, md5 + '.tmp'),
os.path.join(self.local_store, md5))
raise arvados.errors.NotFoundError(
"Invalid data locator: '%s'" % loc_s)
if locator.md5sum == config.EMPTY_BLOCK_LOCATOR.split('+')[0]:
- return ''
- with open(os.path.join(self.local_store, locator.md5sum), 'r') as f:
+ return b''
+ with open(os.path.join(self.local_store, locator.md5sum), 'rb') as f:
return f.read()
def is_cached(self, locator):
-#!/usr/bin/env python
-
+from builtins import range
+from builtins import object
import functools
import inspect
import pycurl
import arvados.errors
-_HTTP_SUCCESSES = set(xrange(200, 300))
+_HTTP_SUCCESSES = set(range(200, 300))
_HTTP_CAN_RETRY = set([408, 409, 422, 423, 500, 502, 503, 504])
class RetryLoop(object):
def running(self):
return self._running and (self._success is None)
- def next(self):
+ def __next__(self):
if self._running is None:
self._running = True
if (self.tries_left < 1) or not self.running():
-import threading
-import api
-import keep
-import config
+from __future__ import absolute_import
+
+from builtins import object
import copy
+import threading
+
+import arvados
+import arvados.keep as keep
+import arvados.config as config
class ThreadSafeApiCache(object):
"""Threadsafe wrapper for API objects.
def localapi(self):
if 'api' not in self.local.__dict__:
- self.local.api = api.api_from_config('v1', apiconfig=self.apiconfig)
+ self.local.api = arvados.api_from_config('v1', apiconfig=self.apiconfig)
return self.local.api
def __getattr__(self, name):
+from __future__ import print_function
+from __future__ import absolute_import
+from future.utils import listvalues
+from builtins import object
import collections
import hashlib
import os
from ._ranges import locators_and_ranges, Range
from .arvfile import StreamFileReader
from arvados.retry import retry_method
-from keep import *
-import config
-import errors
-from _normalize_stream import normalize_stream
+from arvados.keep import *
+from . import config
+from . import errors
+from ._normalize_stream import normalize_stream
class StreamReader(object):
def __init__(self, tokens, keep=None, debug=False, _empty=False,
self._keep = keep
self.num_retries = num_retries
- streamoffset = 0L
+ streamoffset = 0
# parse stream
for tok in tokens:
- if debug: print 'tok', tok
+ if debug: print('tok', tok)
if self._stream_name is None:
self._stream_name = tok.replace('\\040', ' ')
continue
s = re.match(r'^[0-9a-f]{32}\+(\d+)(\+\S+)*$', tok)
if s:
- blocksize = long(s.group(1))
+ blocksize = int(s.group(1))
self._data_locators.append(Range(tok, streamoffset, blocksize, 0))
streamoffset += blocksize
continue
s = re.search(r'^(\d+):(\d+):(\S+)', tok)
if s:
- pos = long(s.group(1))
- size = long(s.group(2))
+ pos = int(s.group(1))
+ size = int(s.group(2))
name = s.group(3).replace('\\040', ' ')
if name not in self._files:
self._files[name] = StreamFileReader(self, [Range(pos, 0, size, 0)], name)
return self._files
def all_files(self):
- return self._files.values()
+ return listvalues(self._files)
def size(self):
n = self._data_locators[-1]
def readfrom(self, start, size, num_retries=None):
"""Read up to 'size' bytes from the stream, starting at 'start'"""
if size == 0:
- return ''
+ return b''
if self._keep is None:
self._keep = KeepClient(num_retries=self.num_retries)
data = []
for lr in locators_and_ranges(self._data_locators, start, size):
data.append(self._keepget(lr.locator, num_retries=num_retries)[lr.segment_offset:lr.segment_offset+lr.segment_size])
- return ''.join(data)
+ return b''.join(data)
def manifest_text(self, strip=False):
manifest_text = [self.name().replace(' ', '\\040')]
manifest_text.extend([d.locator for d in self._data_locators])
manifest_text.extend([' '.join(["{}:{}:{}".format(seg.locator, seg.range_size, f.name.replace(' ', '\\040'))
for seg in f.segments])
- for f in self._files.values()])
+ for f in listvalues(self._files)])
return ' '.join(manifest_text) + '\n'
+from __future__ import print_function
+from builtins import object
import time
class Timer(object):
self.secs = self.end - self.start
self.msecs = self.secs * 1000 # millisecs
if self.verbose:
- print 'elapsed time: %f ms' % self.msecs
+ print('elapsed time: %f ms' % self.msecs)
raise arvados.errors.CommandFailedError(
"tar exited %d" % p.returncode)
os.symlink(tarball, os.path.join(path, '.locator'))
- tld_extracts = filter(lambda f: f != '.locator', os.listdir(path))
+ tld_extracts = [f for f in os.listdir(path) if f != '.locator']
lockfile.close()
if len(tld_extracts) == 1:
return os.path.join(path, tld_extracts[0])
"unzip exited %d" % p.returncode)
os.unlink(zip_filename)
os.symlink(zipball, os.path.join(path, '.locator'))
- tld_extracts = filter(lambda f: f != '.locator', os.listdir(path))
+ tld_extracts = [f for f in os.listdir(path) if f != '.locator']
lockfile.close()
if len(tld_extracts) == 1:
return os.path.join(path, tld_extracts[0])
def list_all(fn, num_retries=0, **kwargs):
# Default limit to (effectively) api server's MAX_LIMIT
- kwargs.setdefault('limit', sys.maxint)
+ kwargs.setdefault('limit', sys.maxsize)
items = []
offset = 0
- items_available = sys.maxint
+ items_available = sys.maxsize
while len(items) < items_available:
c = fn(offset=offset, **kwargs).execute(num_retries=num_retries)
items += c['items']
from arvados.commands.get import main
-sys.exit(main(sys.argv[1:], sys.stdout, sys.stderr))
+sys.exit(main(sys.argv[1:]))
('share/doc/arvados-python-client', ['LICENSE-2.0.txt', 'README.rst']),
],
install_requires=[
- 'google-api-python-client==1.6.2, <1.7',
'ciso8601',
- 'httplib2 >= 0.9.2',
+ 'future',
+ 'google-api-python-client >=1.6.2, <1.7',
+ 'httplib2 >=0.9.2',
'pycurl >=7.19.5.1',
+ 'ruamel.yaml >=0.13.7',
'setuptools',
- 'ws4py<0.4',
- 'ruamel.yaml>=0.13.7'
+ 'ws4py <0.4',
],
test_suite='tests',
tests_require=['pbr<1.7.0', 'mock>=1.0', 'PyYAML'],
-#!/usr/bin/env python
-
+from future import standard_library
+standard_library.install_aliases()
+from builtins import str
+from builtins import range
+from builtins import object
import arvados
import contextlib
import errno
import hashlib
-import httplib
+import http.client
import httplib2
import io
import mock
import os
import pycurl
-import Queue
+import queue
import shutil
import sys
import tempfile
import unittest
+if sys.version_info >= (3, 0):
+ from io import StringIO, BytesIO
+else:
+ from cStringIO import StringIO
+ BytesIO = StringIO
+
# Use this hostname when you want to make sure the traffic will be
# instantly refused. 100::/64 is a dedicated black hole.
TEST_HOST = '100::'
given, it will be consumed to fill the queue before queue_with()
returns.
"""
- queue = Queue.Queue()
+ q = queue.Queue()
for val in items:
- queue.put(val)
- return lambda *args, **kwargs: queue.get(block=False)
+ q.put(val)
+ return lambda *args, **kwargs: q.get(block=False)
# fake_httplib2_response and mock_responses
# mock calls to httplib2.Http.request()
def fake_httplib2_response(code, **headers):
headers.update(status=str(code),
- reason=httplib.responses.get(code, "Unknown Response"))
+ reason=http.client.responses.get(code, "Unknown Response"))
return httplib2.Response(headers)
def mock_responses(body, *codes, **headers):
+ if not isinstance(body, bytes) and hasattr(body, 'encode'):
+ body = body.encode()
return mock.patch('httplib2.Http.request', side_effect=queue_with((
(fake_httplib2_response(code, **headers), body) for code in codes)))
def mock_api_responses(api_client, body, codes, headers={}):
+ if not isinstance(body, bytes) and hasattr(body, 'encode'):
+ body = body.encode()
return mock.patch.object(api_client._http, 'request', side_effect=queue_with((
(fake_httplib2_response(code, **headers), body) for code in codes)))
def str_keep_locator(s):
- return '{}+{}'.format(hashlib.md5(s).hexdigest(), len(s))
+ return '{}+{}'.format(hashlib.md5(s if isinstance(s, bytes) else s.encode()).hexdigest(), len(s))
@contextlib.contextmanager
def redirected_streams(stdout=None, stderr=None):
+ if stdout == StringIO:
+ stdout = StringIO()
+ if stderr == StringIO:
+ stderr = StringIO()
orig_stdout, sys.stdout = sys.stdout, stdout or sys.stdout
orig_stderr, sys.stderr = sys.stderr, stderr or sys.stderr
try:
- yield
+ yield (stdout, stderr)
finally:
sys.stdout = orig_stdout
sys.stderr = orig_stderr
-class FakeCurl:
+class VersionChecker(object):
+ def assertVersionOutput(self, out, err):
+ if sys.version_info >= (3, 0):
+ self.assertEqual(err.getvalue(), '')
+ v = out.getvalue()
+ else:
+ # Python 2 writes version info on stderr.
+ self.assertEqual(out.getvalue(), '')
+ v = err.getvalue()
+ self.assertRegex(v, r"[0-9]+\.[0-9]+\.[0-9]+$\n")
+
+
+class FakeCurl(object):
@classmethod
- def make(cls, code, body='', headers={}):
+ def make(cls, code, body=b'', headers={}):
+ if not isinstance(body, bytes) and hasattr(body, 'encode'):
+ body = body.encode()
return mock.Mock(spec=cls, wraps=cls(code, body, headers))
- def __init__(self, code=200, body='', headers={}):
+ def __init__(self, code=200, body=b'', headers={}):
self._opt = {}
self._got_url = None
self._writer = None
raise ValueError
if self._headerfunction:
self._headerfunction("HTTP/1.1 {} Status".format(self._resp_code))
- for k, v in self._resp_headers.iteritems():
+ for k, v in self._resp_headers.items():
self._headerfunction(k + ': ' + str(v))
if type(self._resp_body) is not bool:
self._writer(self._resp_body)
class MockStreamReader(object):
def __init__(self, name='.', *data):
self._name = name
- self._data = ''.join(data)
+ self._data = b''.join([
+ b if isinstance(b, bytes) else b.encode()
+ for b in data])
self._data_locators = [str_keep_locator(d) for d in data]
self.num_retries = 0
mock_method.return_value = body
else:
mock_method.side_effect = arvados.errors.ApiError(
- fake_httplib2_response(code), "{}")
+ fake_httplib2_response(code), b"{}")
class ArvadosBaseTestCase(unittest.TestCase):
tmpfile.write(leaf)
return tree_root
- def make_test_file(self, text="test"):
+ def make_test_file(self, text=b"test"):
testfile = tempfile.NamedTemporaryFile()
testfile.write(text)
testfile.flush()
return testfile
+
+if sys.version_info < (3, 0):
+ # There is no assert[Not]Regex that works in both Python 2 and 3,
+ # so we backport Python 3 style to Python 2.
+ def assertRegex(self, *args, **kwargs):
+ return self.assertRegexpMatches(*args, **kwargs)
+ def assertNotRegex(self, *args, **kwargs):
+ return self.assertNotRegexpMatches(*args, **kwargs)
+ unittest.TestCase.assertRegex = assertRegex
+ unittest.TestCase.assertNotRegex = assertNotRegex
-import BaseHTTPServer
+from __future__ import division
+from future import standard_library
+standard_library.install_aliases()
+from builtins import str
+import http.server
import hashlib
import os
import re
-import SocketServer
+import socket
+import socketserver
+import sys
+import threading
import time
-class Server(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer, object):
+from . import arvados_testutil as tutil
+
+_debug = os.environ.get('ARVADOS_DEBUG', None)
+
+
+class StubKeepServers(tutil.ApiClientMock):
+
+ def setUp(self):
+ super(StubKeepServers, self).setUp()
+ sock = socket.socket()
+ sock.bind(('0.0.0.0', 0))
+ self.port = sock.getsockname()[1]
+ sock.close()
+ self.server = Server(('0.0.0.0', self.port), Handler)
+ self.thread = threading.Thread(target=self.server.serve_forever)
+ self.thread.daemon = True # Exit thread if main proc exits
+ self.thread.start()
+ self.api_client = self.mock_keep_services(
+ count=1,
+ service_host='localhost',
+ service_port=self.port,
+ )
+
+ def tearDown(self):
+ self.server.shutdown()
+ super(StubKeepServers, self).tearDown()
+
+
+class Server(socketserver.ThreadingMixIn, http.server.HTTPServer, object):
allow_reuse_address = 1
def setdelays(self, **kwargs):
"""In future requests, induce delays at the given checkpoints."""
- for (k, v) in kwargs.iteritems():
- self.delays.get(k) # NameError if unknown key
+ for (k, v) in kwargs.items():
+ self.delays.get(k) # NameError if unknown key
self.delays[k] = v
def setbandwidth(self, bandwidth):
self._sleep_at_least(self.delays[k])
-class Handler(BaseHTTPServer.BaseHTTPRequestHandler, object):
+class Handler(http.server.BaseHTTPRequestHandler, object):
+
+ protocol_version = 'HTTP/1.1'
+
def wfile_bandwidth_write(self, data_to_write):
- if self.server.bandwidth == None and self.server.delays['mid_write'] == 0:
+ if self.server.bandwidth is None and self.server.delays['mid_write'] == 0:
self.wfile.write(data_to_write)
else:
- BYTES_PER_WRITE = int(self.server.bandwidth/4.0) or 32768
+ BYTES_PER_WRITE = int(self.server.bandwidth/4) or 32768
outage_happened = False
num_bytes = len(data_to_write)
num_sent_bytes = 0
while num_sent_bytes < num_bytes:
if num_sent_bytes > self.server.bandwidth and not outage_happened:
self.server._do_delay('mid_write')
- target_time += self.delays['mid_write']
+ target_time += self.server.delays['mid_write']
outage_happened = True
num_write_bytes = min(BYTES_PER_WRITE,
num_bytes - num_sent_bytes)
return None
def rfile_bandwidth_read(self, bytes_to_read):
- if self.server.bandwidth == None and self.server.delays['mid_read'] == 0:
+ if self.server.bandwidth is None and self.server.delays['mid_read'] == 0:
return self.rfile.read(bytes_to_read)
else:
- BYTES_PER_READ = int(self.server.bandwidth/4.0) or 32768
- data = ''
+ BYTES_PER_READ = int(self.server.bandwidth/4) or 32768
+ data = b''
outage_happened = False
bytes_read = 0
target_time = time.time()
while bytes_to_read > bytes_read:
if bytes_read > self.server.bandwidth and not outage_happened:
self.server._do_delay('mid_read')
- target_time += self.delays['mid_read']
+ target_time += self.server.delays['mid_read']
outage_happened = True
next_bytes_to_read = min(BYTES_PER_READ,
bytes_to_read - bytes_read)
self.server._sleep_at_least(target_time - time.time())
return data
+ def finish(self, *args, **kwargs):
+ try:
+ return super(Handler, self).finish(*args, **kwargs)
+ except Exception as err:
+ if _debug:
+ raise
+
def handle(self, *args, **kwargs):
+ try:
+ return super(Handler, self).handle(*args, **kwargs)
+ except:
+ if _debug:
+ raise
+
+ def handle_one_request(self, *args, **kwargs):
+ self._sent_continue = False
self.server._do_delay('request')
- return super(Handler, self).handle(*args, **kwargs)
+ return super(Handler, self).handle_one_request(*args, **kwargs)
+
+ def handle_expect_100(self):
+ self.server._do_delay('request_body')
+ self._sent_continue = True
+ return super(Handler, self).handle_expect_100()
def do_GET(self):
self.server._do_delay('response')
if datahash not in self.server.store:
return self.send_response(404)
self.send_response(200)
+ self.send_header('Connection', 'close')
self.send_header('Content-type', 'application/octet-stream')
self.end_headers()
self.server._do_delay('response_body')
if datahash not in self.server.store:
return self.send_response(404)
self.send_response(200)
+ self.send_header('Connection', 'close')
self.send_header('Content-type', 'application/octet-stream')
self.send_header('Content-length', str(len(self.server.store[datahash])))
self.end_headers()
self.server._do_delay('response_close')
+ self.close_connection = True
def do_PUT(self):
- self.server._do_delay('request_body')
- # The comments at https://bugs.python.org/issue1491 implies that Python
- # 2.7 BaseHTTPRequestHandler was patched to support 100 Continue, but
- # reading the actual code that ships in Debian it clearly is not, so we
- # need to send the response on the socket directly.
- self.wfile_bandwidth_write("%s %d %s\r\n\r\n" %
- (self.protocol_version, 100, "Continue"))
- data = self.rfile_bandwidth_read(int(self.headers.getheader('content-length')))
+ if not self._sent_continue and self.headers.get('expect') == '100-continue':
+ # The comments at https://bugs.python.org/issue1491
+ # implies that Python 2.7 BaseHTTPRequestHandler was
+ # patched to support 100 Continue, but reading the actual
+ # code that ships in Debian it clearly is not, so we need
+ # to send the response on the socket directly.
+ self.server._do_delay('request_body')
+ self.wfile.write("{} {} {}\r\n\r\n".format(
+ self.protocol_version, 100, "Continue").encode())
+ data = self.rfile_bandwidth_read(
+ int(self.headers.get('content-length')))
datahash = hashlib.md5(data).hexdigest()
self.server.store[datahash] = data
+ resp = '{}+{}\n'.format(datahash, len(data)).encode()
self.server._do_delay('response')
self.send_response(200)
+ self.send_header('Connection', 'close')
self.send_header('Content-type', 'text/plain')
+ self.send_header('Content-length', len(resp))
self.end_headers()
self.server._do_delay('response_body')
- self.wfile_bandwidth_write(datahash + '+' + str(len(data)))
+ self.wfile_bandwidth_write(resp)
self.server._do_delay('response_close')
+ self.close_connection = True
def log_request(self, *args, **kwargs):
- if os.environ.get('ARVADOS_DEBUG', None):
+ if _debug:
super(Handler, self).log_request(*args, **kwargs)
-
- def finish(self, *args, **kwargs):
- """Ignore exceptions, notably "Broken pipe" when client times out."""
- try:
- return super(Handler, self).finish(*args, **kwargs)
- except:
- pass
-
- def handle_one_request(self, *args, **kwargs):
- """Ignore exceptions, notably "Broken pipe" when client times out."""
- try:
- return super(Handler, self).handle_one_request(*args, **kwargs)
- except:
- pass
+from __future__ import absolute_import
+from builtins import range
+from builtins import object
import arvados
-import arvados_testutil as tutil
+from . import arvados_testutil as tutil
class ManifestExamples(object):
def make_manifest(self,
pr = profile.Profile()
pr.enable()
try:
- ret = function(*args, **kwargs)
- except Exception as e:
- caught = e
- pr.disable()
- ps = pstats.Stats(pr, stream=outfile)
- ps.sort_stats('time').print_stats()
- if caught:
- raise
- return ret
+ return function(*args, **kwargs)
+ finally:
+ pr.disable()
+ ps = pstats.Stats(pr, stream=outfile)
+ ps.sort_stats('time').print_stats()
return profiled_function
+from __future__ import print_function
+from __future__ import absolute_import
+from builtins import range
import unittest
-from performance_profiler import profiled
+from .performance_profiler import profiled
class PerformanceTestSample(unittest.TestCase):
def foo(self):
for i in range(0,2**20):
j += i
self.foo()
- print 'Hello'
+ print('Hello')
-#!/usr/bin/env python
-
from __future__ import print_function
+from __future__ import division
+from builtins import str
+from builtins import range
import argparse
import atexit
import errno
# Use up to half of the +wait+ period waiting for "passenger
# stop" to work. If the process hasn't exited by then, start
# sending TERM signals.
- startTERM += wait/2
+ startTERM += wait//2
server_pid = None
while now <= deadline and server_pid is None:
except OSError as error:
if error.errno != errno.ENOENT:
raise
- os.mkfifo(fifo, 0700)
+ os.mkfifo(fifo, 0o700)
subprocess.Popen(
['stdbuf', '-i0', '-oL', '-eL', 'sed', '-e', 's/^/['+label+'] /', fifo],
stdout=sys.stderr)
"-listen=:{}".format(port),
"-pid="+_pidfile('keep{}'.format(n))]
- for arg, val in keep_args.iteritems():
+ for arg, val in keep_args.items():
keep_cmd.append("{}={}".format(arg, val))
logf = open(_fifo2stderr('keep{}'.format(n)), 'w')
@staticmethod
def _restore_dict(src, dest):
- for key in dest.keys():
+ for key in list(dest.keys()):
if key not in src:
del dest[key]
dest.update(src)
-#!/usr/bin/env python
-
+from __future__ import absolute_import
+from builtins import str
+from builtins import range
import arvados
import collections
import httplib2
import unittest
import mock
-import run_test_server
+from . import run_test_server
from apiclient import errors as apiclient_errors
from apiclient import http as apiclient_http
from arvados.api import OrderedJsonModel, RETRY_DELAY_INITIAL, RETRY_DELAY_BACKOFF, RETRY_COUNT
-from arvados_testutil import fake_httplib2_response, queue_with
+from .arvados_testutil import fake_httplib2_response, queue_with
if not mimetypes.inited:
mimetypes.init()
def api_error_response(self, code, *errors):
return (fake_httplib2_response(code, **self.ERROR_HEADERS),
json.dumps({'errors': errors,
- 'error_token': '1234567890+12345678'}))
+ 'error_token': '1234567890+12345678'}).encode())
def test_new_api_objects_with_cache(self):
clients = [arvados.api('v1', cache=True) for index in [0, 1]]
mock_responses = {
'arvados.humans.delete': (
fake_httplib2_response(500, **self.ERROR_HEADERS),
- "")
+ b"")
}
req_builder = apiclient_http.RequestMockBuilder(mock_responses)
api = arvados.api('v1', requestBuilder=req_builder)
def test_ordered_json_model(self):
mock_responses = {
- 'arvados.humans.get': (None, json.dumps(collections.OrderedDict(
- (c, int(c, 16)) for c in string.hexdigits))),
- }
+ 'arvados.humans.get': (
+ None,
+ json.dumps(collections.OrderedDict(
+ (c, int(c, 16)) for c in string.hexdigits
+ )).encode(),
+ ),
+ }
req_builder = apiclient_http.RequestMockBuilder(mock_responses)
api = arvados.api('v1',
requestBuilder=req_builder, model=OrderedJsonModel())
result = api.humans().get(uuid='test').execute()
- self.assertEqual(string.hexdigits, ''.join(result.keys()))
+ self.assertEqual(string.hexdigits, ''.join(list(result.keys())))
class RetryREST(unittest.TestCase):
mock_conns = {str(i): mock.MagicMock() for i in range(2)}
self.api._http.connections = mock_conns.copy()
self.api.users().create(body={}).execute()
- for c in mock_conns.itervalues():
+ for c in mock_conns.values():
self.assertEqual(c.close.call_count, expect)
@mock.patch('time.sleep')
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import io
+from __future__ import absolute_import
import os
import sys
import tempfile
import unittest
import arvados.commands.arv_copy as arv_copy
-import arvados_testutil as tutil
+from . import arvados_testutil as tutil
-class ArvCopyTestCase(unittest.TestCase):
+class ArvCopyTestCase(unittest.TestCase, tutil.VersionChecker):
def run_copy(self, args):
sys.argv = ['arv-copy'] + args
return arv_copy.main()
self.run_copy(['-x=unknown'])
def test_version_argument(self):
- err = io.BytesIO()
- out = io.BytesIO()
- with tutil.redirected_streams(stdout=out, stderr=err):
+ with tutil.redirected_streams(
+ stdout=tutil.StringIO, stderr=tutil.StringIO) as (out, err):
with self.assertRaises(SystemExit):
self.run_copy(['--version'])
- self.assertEqual(out.getvalue(), '')
- self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
+ self.assertVersionOutput(out, err)
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
+from __future__ import absolute_import
+from future.utils import listitems
import io
import os
import re
import arvados
import arvados.collection as collection
import arvados.commands.get as arv_get
-import run_test_server
+from . import run_test_server
-from arvados_testutil import redirected_streams
+from . import arvados_testutil as tutil
-class ArvadosGetTestCase(run_test_server.TestCaseWithServers):
+class ArvadosGetTestCase(run_test_server.TestCaseWithServers,
+ tutil.VersionChecker):
MAIN_SERVER = {}
KEEP_SERVER = {}
'subdir/baz.txt' : 'baz',
}):
c = collection.Collection()
- for path, data in contents.items():
- with c.open(path, 'w') as f:
+ for path, data in listitems(contents):
+ with c.open(path, 'wb') as f:
f.write(data)
c.save_new()
+
return (c.manifest_locator(),
c.portable_data_hash(),
c.manifest_text(strip=strip_manifest))
-
+
def run_get(self, args):
- self.stdout = io.BytesIO()
- self.stderr = io.BytesIO()
+ self.stdout = tutil.BytesIO()
+ self.stderr = tutil.StringIO()
return arv_get.main(args, self.stdout, self.stderr)
def test_version_argument(self):
- err = io.BytesIO()
- out = io.BytesIO()
- with redirected_streams(stdout=out, stderr=err):
+ with tutil.redirected_streams(
+ stdout=tutil.StringIO, stderr=tutil.StringIO) as (out, err):
with self.assertRaises(SystemExit):
self.run_get(['--version'])
- self.assertEqual(out.getvalue(), '')
- self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
+ self.assertVersionOutput(out, err)
def test_get_single_file(self):
# Get the file using the collection's locator
r = self.run_get(["{}/subdir/baz.txt".format(self.col_loc), '-'])
self.assertEqual(0, r)
- self.assertEqual('baz', self.stdout.getvalue())
+ self.assertEqual(b'baz', self.stdout.getvalue())
# Then, try by PDH
r = self.run_get(["{}/subdir/baz.txt".format(self.col_pdh), '-'])
self.assertEqual(0, r)
- self.assertEqual('baz', self.stdout.getvalue())
+ self.assertEqual(b'baz', self.stdout.getvalue())
def test_get_multiple_files(self):
# Download the entire collection to the temp directory
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
+from __future__ import absolute_import
import arvados
import hashlib
-import io
import mock
import os
import subprocess
import logging
import arvados.commands.keepdocker as arv_keepdocker
-import arvados_testutil as tutil
-import run_test_server
+from . import arvados_testutil as tutil
+from . import run_test_server
class StopTest(Exception):
pass
-class ArvKeepdockerTestCase(unittest.TestCase):
+class ArvKeepdockerTestCase(unittest.TestCase, tutil.VersionChecker):
def run_arv_keepdocker(self, args, err):
sys.argv = ['arv-keepdocker'] + args
log_handler = logging.StreamHandler(err)
self.run_arv_keepdocker(['-x=unknown'], sys.stderr)
def test_version_argument(self):
- err = io.BytesIO()
- out = io.BytesIO()
- with tutil.redirected_streams(stdout=out, stderr=err):
+ with tutil.redirected_streams(
+ stdout=tutil.StringIO, stderr=tutil.StringIO) as (out, err):
with self.assertRaises(SystemExit):
self.run_arv_keepdocker(['--version'], sys.stderr)
- self.assertEqual(out.getvalue(), '')
- self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
+ self.assertVersionOutput(out, err)
@mock.patch('arvados.commands.keepdocker.find_image_hashes',
return_value=['abc123'])
@mock.patch('arvados.commands.keepdocker.find_one_image_hash',
return_value='abc123')
def test_image_format_compatibility(self, _1, _2):
- old_id = hashlib.sha256('old').hexdigest()
- new_id = 'sha256:'+hashlib.sha256('new').hexdigest()
+ old_id = hashlib.sha256(b'old').hexdigest()
+ new_id = 'sha256:'+hashlib.sha256(b'new').hexdigest()
for supported, img_id, expect_ok in [
(['v1'], old_id, True),
(['v1'], new_id, False),
else:
fakeDD['dockerImageFormats'] = supported
- err = io.BytesIO()
- out = io.BytesIO()
+ err = tutil.StringIO()
+ out = tutil.StringIO()
with tutil.redirected_streams(stdout=out), \
mock.patch('arvados.api') as api, \
self.assertEqual(out.getvalue(), '')
if expect_ok:
- self.assertNotRegexpMatches(
+ self.assertNotRegex(
err.getvalue(), "refusing to store",
msg=repr((supported, img_id)))
else:
- self.assertRegexpMatches(
+ self.assertRegex(
err.getvalue(), "refusing to store",
msg=repr((supported, img_id)))
if not supported:
- self.assertRegexpMatches(
+ self.assertRegex(
err.getvalue(),
"server does not specify supported image formats",
msg=repr((supported, img_id)))
fakeDD = arvados.api('v1')._rootDesc
fakeDD['dockerImageFormats'] = ['v1']
- err = io.BytesIO()
- out = io.BytesIO()
+ err = tutil.StringIO()
+ out = tutil.StringIO()
with tutil.redirected_streams(stdout=out), \
mock.patch('arvados.api') as api, \
mock.patch('arvados.commands.keepdocker.popen_docker',
api()._rootDesc = fakeDD
self.run_arv_keepdocker(
['--force', '--force-image-format', 'testimage'], err)
- self.assertRegexpMatches(err.getvalue(), "forcing incompatible image")
+ self.assertRegex(err.getvalue(), "forcing incompatible image")
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import io
+from __future__ import absolute_import
+from builtins import str
+from builtins import range
import os
import random
import sys
import arvados.errors as arv_error
import arvados.commands.ls as arv_ls
-import run_test_server
+from . import run_test_server
-from arvados_testutil import str_keep_locator, redirected_streams
+from . import arvados_testutil as tutil
+from .arvados_testutil import str_keep_locator, redirected_streams, StringIO
-class ArvLsTestCase(run_test_server.TestCaseWithServers):
+class ArvLsTestCase(run_test_server.TestCaseWithServers, tutil.VersionChecker):
FAKE_UUID = 'zzzzz-4zz18-12345abcde12345'
def newline_join(self, seq):
return coll_info, api_client
def run_ls(self, args, api_client, logger=None):
- self.stdout = io.BytesIO()
- self.stderr = io.BytesIO()
+ self.stdout = StringIO()
+ self.stderr = StringIO()
return arv_ls.main(args, self.stdout, self.stderr, api_client, logger)
def test_plain_listing(self):
self.assertEqual(1, error_mock.call_count)
def test_version_argument(self):
- err = io.BytesIO()
- out = io.BytesIO()
- with redirected_streams(stdout=out, stderr=err):
+ with redirected_streams(stdout=StringIO, stderr=StringIO) as (out, err):
with self.assertRaises(SystemExit):
self.run_ls(['--version'], None)
- self.assertEqual(out.getvalue(), '')
- self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
+ self.assertVersionOutput(out, err)
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
import subprocess
import sys
import tempfile
import unittest
+from . import arvados_testutil as tutil
+
-class ArvNormalizeTestCase(unittest.TestCase):
+class ArvNormalizeTestCase(unittest.TestCase, tutil.VersionChecker):
def run_arv_normalize(self, args=[]):
p = subprocess.Popen([sys.executable, 'bin/arv-normalize'] + args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
- (stdout, stderr) = p.communicate()
- return p.returncode, stdout, stderr
+ out, err = p.communicate()
+ sys.stdout.write(out.decode())
+ sys.stderr.write(err.decode())
+ return p.returncode
def test_unsupported_arg(self):
- returncode, out, err = self.run_arv_normalize(['-x=unknown'])
+ with tutil.redirected_streams(
+ stdout=tutil.StringIO, stderr=tutil.StringIO) as (out, err):
+ returncode = self.run_arv_normalize(['-x=unknown'])
self.assertNotEqual(0, returncode)
def test_version_argument(self):
- returncode, out, err = self.run_arv_normalize(['--version'])
+ with tutil.redirected_streams(
+ stdout=tutil.StringIO, stderr=tutil.StringIO) as (out, err):
+ returncode = self.run_arv_normalize(['--version'])
+ self.assertVersionOutput(out, err)
self.assertEqual(0, returncode)
- self.assertEqual('', out)
- self.assertNotEqual('', err)
- self.assertRegexpMatches(err, "[0-9]+\.[0-9]+\.[0-9]+")
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
+from __future__ import absolute_import
+from __future__ import division
+from future import standard_library
+standard_library.install_aliases()
+from builtins import str
+from builtins import range
import apiclient
-import io
import mock
import os
import pwd
import hashlib
import random
-from cStringIO import StringIO
-
import arvados
import arvados.commands.put as arv_put
-import arvados_testutil as tutil
+from . import arvados_testutil as tutil
-from arvados_testutil import ArvadosBaseTestCase, fake_httplib2_response
-import run_test_server
+from .arvados_testutil import ArvadosBaseTestCase, fake_httplib2_response
+from . import run_test_server
class ArvadosPutResumeCacheTest(ArvadosBaseTestCase):
CACHE_ARGSET = [
_, self.large_file_name = tempfile.mkstemp()
fileobj = open(self.large_file_name, 'w')
# Make sure to write just a little more than one block
- for _ in range((arvados.config.KEEP_BLOCK_SIZE/(1024*1024))+1):
- data = random.choice(['x', 'y', 'z']) * 1024 * 1024 # 1 MB
+ for _ in range((arvados.config.KEEP_BLOCK_SIZE>>20)+1):
+ data = random.choice(['x', 'y', 'z']) * 1024 * 1024 # 1 MiB
fileobj.write(data)
fileobj.close()
# Temp dir containing small files to be repacked
def test_writer_works_with_cache(self):
with tempfile.NamedTemporaryFile() as f:
- f.write('foo')
+ f.write(b'foo')
f.flush()
cwriter = arv_put.ArvPutUploadJob([f.name])
cwriter.start(save_collection=False)
def test_progress_reporting(self):
with tempfile.NamedTemporaryFile() as f:
- f.write('foo')
+ f.write(b'foo')
f.flush()
for expect_count in (None, 8):
progression, reporter = self.make_progress_tester()
def test_known_human_progress(self):
for count, total in [(0, 1), (2, 4), (45, 60)]:
- expect = '{:.1%}'.format(float(count) / total)
+ expect = '{:.1%}'.format(1.0*count/total)
actual = arv_put.human_progress(count, total)
self.assertTrue(actual.startswith('\r'))
self.assertIn(expect, actual)
arv_put.human_progress(count, None)))
-class ArvadosPutTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase):
+class ArvadosPutTest(run_test_server.TestCaseWithServers,
+ ArvadosBaseTestCase,
+ tutil.VersionChecker):
MAIN_SERVER = {}
Z_UUID = 'zzzzz-zzzzz-zzzzzzzzzzzzzzz'
def call_main_with_args(self, args):
- self.main_stdout = StringIO()
- self.main_stderr = StringIO()
+ self.main_stdout = tutil.StringIO()
+ self.main_stderr = tutil.StringIO()
return arv_put.main(args, self.main_stdout, self.main_stderr)
def call_main_on_test_file(self, args=[]):
super(ArvadosPutTest, self).tearDown()
def test_version_argument(self):
- err = io.BytesIO()
- out = io.BytesIO()
- with tutil.redirected_streams(stdout=out, stderr=err):
+ with tutil.redirected_streams(
+ stdout=tutil.StringIO, stderr=tutil.StringIO) as (out, err):
with self.assertRaises(SystemExit):
self.call_main_with_args(['--version'])
- self.assertEqual(out.getvalue(), '')
- self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
+ self.assertVersionOutput(out, err)
def test_simple_file_put(self):
self.call_main_on_test_file()
def test_api_error_handling(self):
coll_save_mock = mock.Mock(name='arv.collection.Collection().save_new()')
coll_save_mock.side_effect = arvados.errors.ApiError(
- fake_httplib2_response(403), '{}')
+ fake_httplib2_response(403), b'{}')
with mock.patch('arvados.collection.Collection.save_new',
new=coll_save_mock):
with self.assertRaises(SystemExit) as exc_test:
result = arv_put.desired_project_uuid(arv_put.api_client, BAD_UUID,
0)
except ValueError as error:
- self.assertIn(BAD_UUID, error.message)
+ self.assertIn(BAD_UUID, str(error))
else:
self.assertFalse(result, "incorrectly found nonexistent project")
[sys.executable, arv_put.__file__, '--stream'],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, env=self.ENVIRON)
- pipe.stdin.write('stdin test\n')
+ pipe.stdin.write(b'stdin test\n')
pipe.stdin.close()
deadline = time.time() + 5
while (pipe.poll() is None) and (time.time() < deadline):
elif returncode != 0:
sys.stdout.write(pipe.stdout.read())
self.fail("arv-put returned exit code {}".format(returncode))
- self.assertIn('4a9c8b735dce4b5fa3acf221a0b13628+11', pipe.stdout.read())
+ self.assertIn('4a9c8b735dce4b5fa3acf221a0b13628+11',
+ pipe.stdout.read().decode())
def test_ArvPutSignedManifest(self):
# ArvPutSignedManifest runs "arv-put foo" and then attempts to get
with open(os.path.join(datadir, "foo"), "w") as f:
f.write("The quick brown fox jumped over the lazy dog")
p = subprocess.Popen([sys.executable, arv_put.__file__, datadir],
- stdout=subprocess.PIPE, env=self.ENVIRON)
- (arvout, arverr) = p.communicate()
- self.assertEqual(arverr, None)
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=self.ENVIRON)
+ (out, err) = p.communicate()
+ self.assertRegex(err.decode(), r'INFO: Collection saved as ')
self.assertEqual(p.returncode, 0)
# The manifest text stored in the API server under the same
# manifest UUID must use signed locators.
c = arv_put.api_client.collections().get(uuid=manifest_uuid).execute()
- self.assertRegexpMatches(
+ self.assertRegex(
c['manifest_text'],
r'^\. 08a008a01d498c404b0c30852b39d3b8\+44\+A[0-9a-f]+@[0-9a-f]+ 0:44:foo\n')
[sys.executable, arv_put.__file__] + extra_args,
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, env=self.ENVIRON)
- stdout, stderr = pipe.communicate(text)
+ stdout, stderr = pipe.communicate(text.encode())
+ self.assertRegex(stderr.decode(), r'INFO: Collection (updated:|saved as)')
search_key = ('portable_data_hash'
if '--portable-data-hash' in extra_args else 'uuid')
collection_list = arvados.api('v1').collections().list(
- filters=[[search_key, '=', stdout.strip()]]).execute().get('items', [])
+ filters=[[search_key, '=', stdout.decode().strip()]]
+ ).execute().get('items', [])
self.assertEqual(1, len(collection_list))
return collection_list[0]
self.assertEqual(col['uuid'], updated_col['uuid'])
# Get the manifest and check that the new file is being included
c = arv_put.api_client.collections().get(uuid=updated_col['uuid']).execute()
- self.assertRegexpMatches(c['manifest_text'], r'^\. .*:44:file2\n')
+ self.assertRegex(c['manifest_text'], r'^\. .*:44:file2\n')
def test_put_collection_with_high_redundancy(self):
# Write empty data: we're not testing CollectionWriter, just
"Test unnamed collection",
['--portable-data-hash', '--project-uuid', self.PROJECT_UUID])
username = pwd.getpwuid(os.getuid()).pw_name
- self.assertRegexpMatches(
+ self.assertRegex(
link['name'],
r'^Saved at .* by {}@'.format(re.escape(username)))
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import io
+from __future__ import absolute_import
import os
import sys
import tempfile
import unittest
import arvados.commands.run as arv_run
-import arvados_testutil as tutil
+from . import arvados_testutil as tutil
-class ArvRunTestCase(unittest.TestCase):
+class ArvRunTestCase(unittest.TestCase, tutil.VersionChecker):
def run_arv_run(self, args):
sys.argv = ['arv-run'] + args
return arv_run.main()
self.run_arv_run(['-x=unknown'])
def test_version_argument(self):
- err = io.BytesIO()
- out = io.BytesIO()
- with tutil.redirected_streams(stdout=out, stderr=err):
+ with tutil.redirected_streams(
+ stdout=tutil.StringIO, stderr=tutil.StringIO) as (out, err):
with self.assertRaises(SystemExit):
self.run_arv_run(['--version'])
- self.assertEqual(out.getvalue(), '')
- self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
+ self.assertVersionOutput(out, err)
-#!/usr/bin/env python
-
-import io
+from __future__ import absolute_import
import os
import sys
import tempfile
import arvados.errors as arv_error
import arvados.commands.ws as arv_ws
-import arvados_testutil as tutil
+from . import arvados_testutil as tutil
-class ArvWsTestCase(unittest.TestCase):
+class ArvWsTestCase(unittest.TestCase, tutil.VersionChecker):
def run_ws(self, args):
return arv_ws.main(args)
self.run_ws(['-x=unknown'])
def test_version_argument(self):
- err = io.BytesIO()
- out = io.BytesIO()
- with tutil.redirected_streams(stdout=out, stderr=err):
+ with tutil.redirected_streams(
+ stdout=tutil.StringIO, stderr=tutil.StringIO) as (out, err):
with self.assertRaises(SystemExit):
self.run_ws(['--version'])
- self.assertEqual(out.getvalue(), '')
- self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
+ self.assertVersionOutput(out, err)
-#!/usr/bin/env python
-
+from __future__ import absolute_import
+from builtins import hex
+from builtins import str
+from builtins import range
+from builtins import object
import bz2
import datetime
import gzip
from arvados.collection import Collection, CollectionReader
from arvados.arvfile import ArvadosFile, ArvadosFileReader
-import arvados_testutil as tutil
-from test_stream import StreamFileReaderTestCase, StreamRetryTestMixin
+from . import arvados_testutil as tutil
+from .test_stream import StreamFileReaderTestCase, StreamRetryTestMixin
class ArvadosFileWriterTestCase(unittest.TestCase):
class MockKeep(object):
return self.blocks.get(locator)
def put(self, data, num_retries=None, copies=None):
pdh = tutil.str_keep_locator(data)
- self.blocks[pdh] = str(data)
+ self.blocks[pdh] = bytes(data)
return pdh
class MockApi(object):
def test_truncate(self):
- keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
- api = ArvadosFileWriterTestCase.MockApi({"name":"test_truncate",
- "manifest_text":". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
- "replication_desired":None},
- {"uuid":"zzzzz-4zz18-mockcollection0",
- "manifest_text":". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
- "portable_data_hash":"7fcd0eaac3aad4c31a6a0e756475da92+52"})
+ keep = ArvadosFileWriterTestCase.MockKeep({
+ "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
+ })
+ api = ArvadosFileWriterTestCase.MockApi({
+ "name": "test_truncate",
+ "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
+ "replication_desired": None,
+ }, {
+ "uuid": "zzzzz-4zz18-mockcollection0",
+ "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 0:8:count.txt\n",
+ "portable_data_hash":"7fcd0eaac3aad4c31a6a0e756475da92+52",
+ })
with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
api_client=api, keep_client=keep) as c:
- writer = c.open("count.txt", "r+")
+ writer = c.open("count.txt", "rb+")
self.assertEqual(writer.size(), 10)
- self.assertEqual("0123456789", writer.read(12))
+ self.assertEqual(b"0123456789", writer.read(12))
writer.truncate(8)
# Make sure reading off the end doesn't break
- self.assertEqual("", writer.read(12))
+ self.assertEqual(b"", writer.read(12))
self.assertEqual(writer.size(), 8)
writer.seek(0, os.SEEK_SET)
- self.assertEqual("01234567", writer.read(12))
+ self.assertEqual(b"01234567", writer.read(12))
self.assertIsNone(c.manifest_locator())
self.assertTrue(c.modified())
def test_write_to_end(self):
- keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
- api = ArvadosFileWriterTestCase.MockApi({"name":"test_append",
- "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
- "replication_desired":None},
- {"uuid":"zzzzz-4zz18-mockcollection0",
- "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
- "portable_data_hash":"c5c3af76565c8efb6a806546bcf073f3+88"})
+ keep = ArvadosFileWriterTestCase.MockKeep({
+ "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
+ })
+ api = ArvadosFileWriterTestCase.MockApi({
+ "name": "test_append",
+ "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
+ "replication_desired": None,
+ }, {
+ "uuid": "zzzzz-4zz18-mockcollection0",
+ "manifest_text": ". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:13:count.txt\n",
+ "portable_data_hash": "c5c3af76565c8efb6a806546bcf073f3+88",
+ })
with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
api_client=api, keep_client=keep) as c:
- writer = c.open("count.txt", "r+")
+ writer = c.open("count.txt", "rb+")
self.assertEqual(writer.size(), 10)
self.assertEqual(5, writer.seek(5, os.SEEK_SET))
- self.assertEqual("56789", writer.read(8))
+ self.assertEqual(b"56789", writer.read(8))
writer.seek(10, os.SEEK_SET)
writer.write("foo")
self.assertEqual(writer.size(), 13)
writer.seek(5, os.SEEK_SET)
- self.assertEqual("56789foo", writer.read(8))
+ self.assertEqual(b"56789foo", writer.read(8))
self.assertIsNone(c.manifest_locator())
self.assertTrue(c.modified())
c.save_new("test_append")
self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
self.assertFalse(c.modified())
- self.assertEqual("foo", keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
+ self.assertEqual(b"foo", keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
def test_append(self):
- keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ keep = ArvadosFileWriterTestCase.MockKeep({
+ "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
+ })
c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', keep_client=keep)
- writer = c.open("count.txt", "a+")
- self.assertEqual(writer.read(20), "0123456789")
+ writer = c.open("count.txt", "ab+")
+ self.assertEqual(writer.read(20), b"0123456789")
writer.seek(0, os.SEEK_SET)
writer.write("hello")
- self.assertEqual(writer.read(20), "0123456789hello")
+ self.assertEqual(writer.read(20), b"0123456789hello")
writer.seek(0, os.SEEK_SET)
writer.write("world")
- self.assertEqual(writer.read(20), "0123456789helloworld")
+ self.assertEqual(writer.read(20), b"0123456789helloworld")
self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 fc5e038d38a57032085441e7fe7010b0+10 0:20:count.txt\n", c.portable_manifest_text())
def test_write_at_beginning(self):
- keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ keep = ArvadosFileWriterTestCase.MockKeep({
+ "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
+ })
with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
keep_client=keep) as c:
- writer = c.open("count.txt", "r+")
- self.assertEqual("0123456789", writer.readfrom(0, 13))
+ writer = c.open("count.txt", "rb+")
+ self.assertEqual(b"0123456789", writer.readfrom(0, 13))
writer.seek(0, os.SEEK_SET)
writer.write("foo")
self.assertEqual(writer.size(), 10)
- self.assertEqual("foo3456789", writer.readfrom(0, 13))
+ self.assertEqual(b"foo3456789", writer.readfrom(0, 13))
self.assertEqual(". acbd18db4cc2f85cedef654fccc4a4d8+3 781e5e245d69b566979b86e28d23f2c7+10 0:3:count.txt 6:7:count.txt\n", c.portable_manifest_text())
def test_write_empty(self):
keep = ArvadosFileWriterTestCase.MockKeep({})
with Collection(keep_client=keep) as c:
- writer = c.open("count.txt", "w")
+ writer = c.open("count.txt", "wb")
self.assertEqual(writer.size(), 0)
self.assertEqual(". d41d8cd98f00b204e9800998ecf8427e+0 0:0:count.txt\n", c.portable_manifest_text())
def test_save_manifest_text(self):
keep = ArvadosFileWriterTestCase.MockKeep({})
with Collection(keep_client=keep) as c:
- writer = c.open("count.txt", "w")
- writer.write("0123456789")
+ writer = c.open("count.txt", "wb")
+ writer.write(b"0123456789")
self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.portable_manifest_text())
self.assertNotIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
def test_get_manifest_text_commits(self):
keep = ArvadosFileWriterTestCase.MockKeep({})
with Collection(keep_client=keep) as c:
- writer = c.open("count.txt", "w")
+ writer = c.open("count.txt", "wb")
writer.write("0123456789")
self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.portable_manifest_text())
self.assertNotIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
def test_write_in_middle(self):
- keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
keep_client=keep) as c:
- writer = c.open("count.txt", "r+")
- self.assertEqual("0123456789", writer.readfrom(0, 13))
+ writer = c.open("count.txt", "rb+")
+ self.assertEqual(b"0123456789", writer.readfrom(0, 13))
writer.seek(3, os.SEEK_SET)
writer.write("foo")
self.assertEqual(writer.size(), 10)
- self.assertEqual("012foo6789", writer.readfrom(0, 13))
+ self.assertEqual(b"012foo6789", writer.readfrom(0, 13))
self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:count.txt 10:3:count.txt 6:4:count.txt\n", c.portable_manifest_text())
def test_write_at_end(self):
- keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
keep_client=keep) as c:
- writer = c.open("count.txt", "r+")
- self.assertEqual("0123456789", writer.readfrom(0, 13))
+ writer = c.open("count.txt", "rb+")
+ self.assertEqual(b"0123456789", writer.readfrom(0, 13))
writer.seek(7, os.SEEK_SET)
writer.write("foo")
self.assertEqual(writer.size(), 10)
- self.assertEqual("0123456foo", writer.readfrom(0, 13))
+ self.assertEqual(b"0123456foo", writer.readfrom(0, 13))
self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:7:count.txt 10:3:count.txt\n", c.portable_manifest_text())
def test_write_across_segment_boundary(self):
- keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt 0:10:count.txt\n',
keep_client=keep) as c:
- writer = c.open("count.txt", "r+")
- self.assertEqual("012345678901234", writer.readfrom(0, 15))
+ writer = c.open("count.txt", "rb+")
+ self.assertEqual(b"012345678901234", writer.readfrom(0, 15))
writer.seek(7, os.SEEK_SET)
writer.write("foobar")
self.assertEqual(writer.size(), 20)
- self.assertEqual("0123456foobar34", writer.readfrom(0, 15))
+ self.assertEqual(b"0123456foobar34", writer.readfrom(0, 15))
self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 3858f62230ac3c915f300c664312c63f+6 0:7:count.txt 10:6:count.txt 3:7:count.txt\n", c.portable_manifest_text())
def test_write_across_several_segments(self):
- keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": b"0123456789"})
with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:4:count.txt 0:4:count.txt 0:4:count.txt',
keep_client=keep) as c:
- writer = c.open("count.txt", "r+")
- self.assertEqual("012301230123", writer.readfrom(0, 15))
+ writer = c.open("count.txt", "rb+")
+ self.assertEqual(b"012301230123", writer.readfrom(0, 15))
writer.seek(2, os.SEEK_SET)
writer.write("abcdefg")
self.assertEqual(writer.size(), 12)
- self.assertEqual("01abcdefg123", writer.readfrom(0, 15))
+ self.assertEqual(b"01abcdefg123", writer.readfrom(0, 15))
self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 7ac66c0f148de9519b8bd264312c4d64+7 0:2:count.txt 10:7:count.txt 1:3:count.txt\n", c.portable_manifest_text())
def test_write_large(self):
"portable_data_hash":"9132ca8e3f671c76103a38f5bc24328c+108"})
with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
api_client=api, keep_client=keep) as c:
- writer = c.open("count.txt", "r+")
+ writer = c.open("count.txt", "rb+")
text = "0123456789" * 100
- for b in xrange(0, 100000):
+ for b in range(0, 100000):
writer.write(text)
self.assertEqual(writer.size(), 100000000)
api = ArvadosFileWriterTestCase.MockApi({}, {})
with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
api_client=api, keep_client=keep) as c:
- writer = c.open("count.txt", "r+")
+ writer = c.open("count.txt", "rb+")
self.assertEqual(writer.size(), 0)
text = "0123456789"
keep = ArvadosFileWriterTestCase.MockKeep({})
with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
keep_client=keep) as c:
- writer = c.open("count.txt", "r+")
- for b in xrange(0, 10):
+ writer = c.open("count.txt", "rb+")
+ for b in range(0, 10):
writer.seek(0, os.SEEK_SET)
writer.write("0123456789")
self.assertEqual(writer.size(), 10)
- self.assertEqual("0123456789", writer.readfrom(0, 20))
+ self.assertEqual(b"0123456789", writer.readfrom(0, 20))
self.assertEqual(". 7a08b07e84641703e5f2c836aa59a170+100 90:10:count.txt\n", c.portable_manifest_text())
writer.flush()
self.assertEqual(writer.size(), 10)
- self.assertEqual("0123456789", writer.readfrom(0, 20))
+ self.assertEqual(b"0123456789", writer.readfrom(0, 20))
self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n", c.portable_manifest_text())
def test_rewrite_append_existing_file(self):
- keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ keep = ArvadosFileWriterTestCase.MockKeep({
+ "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
+ })
with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
keep_client=keep) as c:
- writer = c.open("count.txt", "r+")
- for b in xrange(0, 10):
+ writer = c.open("count.txt", "rb+")
+ for b in range(0, 10):
writer.seek(10, os.SEEK_SET)
writer.write("abcdefghij")
self.assertEqual(writer.size(), 20)
- self.assertEqual("0123456789abcdefghij", writer.readfrom(0, 20))
+ self.assertEqual(b"0123456789abcdefghij", writer.readfrom(0, 20))
self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:10:count.txt 100:10:count.txt\n", c.portable_manifest_text())
writer.arvadosfile.flush()
self.assertEqual(writer.size(), 20)
- self.assertEqual("0123456789abcdefghij", writer.readfrom(0, 20))
+ self.assertEqual(b"0123456789abcdefghij", writer.readfrom(0, 20))
self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n", c.portable_manifest_text())
def test_rewrite_over_existing_file(self):
- keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ keep = ArvadosFileWriterTestCase.MockKeep({
+ "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
+ })
with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt',
keep_client=keep) as c:
- writer = c.open("count.txt", "r+")
- for b in xrange(0, 10):
+ writer = c.open("count.txt", "rb+")
+ for b in range(0, 10):
writer.seek(5, os.SEEK_SET)
writer.write("abcdefghij")
self.assertEqual(writer.size(), 15)
- self.assertEqual("01234abcdefghij", writer.readfrom(0, 20))
+ self.assertEqual(b"01234abcdefghij", writer.readfrom(0, 20))
self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:5:count.txt 100:10:count.txt\n", c.portable_manifest_text())
writer.arvadosfile.flush()
self.assertEqual(writer.size(), 15)
- self.assertEqual("01234abcdefghij", writer.readfrom(0, 20))
+ self.assertEqual(b"01234abcdefghij", writer.readfrom(0, 20))
self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:5:count.txt 10:10:count.txt\n", c.portable_manifest_text())
def test_write_large_rewrite(self):
"portable_data_hash":"217665c6b713e1b78dfba7ebd42344db+156"})
with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
api_client=api, keep_client=keep) as c:
- writer = c.open("count.txt", "r+")
- text = ''.join(["0123456789" for a in xrange(0, 100)])
- for b in xrange(0, 100000):
+ writer = c.open("count.txt", "rb+")
+ text = b''.join([b"0123456789" for a in range(0, 100)])
+ for b in range(0, 100000):
writer.write(text)
writer.seek(0, os.SEEK_SET)
writer.write("foo")
def test_create(self):
keep = ArvadosFileWriterTestCase.MockKeep({})
- api = ArvadosFileWriterTestCase.MockApi({"name":"test_create",
- "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
- "replication_desired":None},
- {"uuid":"zzzzz-4zz18-mockcollection0",
- "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
- "portable_data_hash":"7a461a8c58601798f690f8b368ac4423+51"})
+ api = ArvadosFileWriterTestCase.MockApi({
+ "name":"test_create",
+ "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
+ "replication_desired":None,
+ }, {
+ "uuid":"zzzzz-4zz18-mockcollection0",
+ "manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 0:8:count.txt\n",
+ "portable_data_hash":"7a461a8c58601798f690f8b368ac4423+51",
+ })
with Collection(api_client=api, keep_client=keep) as c:
- writer = c.open("count.txt", "w+")
+ writer = c.open("count.txt", "wb+")
self.assertEqual(writer.size(), 0)
writer.write("01234567")
self.assertEqual(writer.size(), 8)
c.save_new("test_create")
self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
self.assertFalse(c.modified())
- self.assertEqual("01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
+ self.assertEqual(b"01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
def test_create_subdir(self):
"portable_data_hash":"1b02aaa62528d28a5be41651cbb9d7c7+59"})
with Collection(api_client=api, keep_client=keep) as c:
self.assertIsNone(c.api_response())
- writer = c.open("foo/bar/count.txt", "w+")
+ writer = c.open("foo/bar/count.txt", "wb+")
writer.write("01234567")
self.assertFalse(c.committed())
c.save_new("test_create")
"portable_data_hash":"7a461a8c58601798f690f8b368ac4423+51"})
with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n',
api_client=api, keep_client=keep) as c:
- writer = c.open("count.txt", "w+")
+ writer = c.open("count.txt", "wb+")
self.assertEqual(writer.size(), 0)
writer.write("01234567")
self.assertEqual(writer.size(), 8)
def test_file_not_found(self):
with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
with self.assertRaises(IOError):
- writer = c.open("nocount.txt", "r")
+ writer = c.open("nocount.txt", "rb")
def test_cannot_open_directory(self):
with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
with self.assertRaises(IOError):
- writer = c.open(".", "r")
+ writer = c.open(".", "rb")
def test_create_multiple(self):
keep = ArvadosFileWriterTestCase.MockKeep({})
"manifest_text":". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:8:count1.txt 8:8:count2.txt\n",
"portable_data_hash":"71e7bb6c00d31fc2b4364199fd97be08+102"})
with Collection(api_client=api, keep_client=keep) as c:
- w1 = c.open("count1.txt", "w")
- w2 = c.open("count2.txt", "w")
+ w1 = c.open("count1.txt", "wb")
+ w2 = c.open("count2.txt", "wb")
w1.write("01234567")
w2.write("abcdefgh")
self.assertEqual(w1.size(), 8)
c.save_new("test_create_multiple")
self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
self.assertFalse(c.modified())
- self.assertEqual("01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
+ self.assertEqual(b"01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
class ArvadosFileReaderTestCase(StreamFileReaderTestCase):
stream = []
n = 0
blocks = {}
- for d in ['01234', '34567', '67890']:
+ for d in [b'01234', b'34567', b'67890']:
loc = tutil.str_keep_locator(d)
blocks[loc] = d
stream.append(Range(loc, n, len(d)))
n += len(d)
af = ArvadosFile(ArvadosFileReaderTestCase.MockParent(blocks, nocache), "count.txt", stream=stream, segments=[Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)])
- return ArvadosFileReader(af)
+ return ArvadosFileReader(af, mode="rb")
def test_read_block_crossing_behavior(self):
# read() needs to return all the data requested if possible, even if it
# crosses uncached blocks: https://arvados.org/issues/5856
sfile = self.make_count_reader(nocache=True)
- self.assertEqual('12345678', sfile.read(8))
+ self.assertEqual(b'12345678', sfile.read(8))
def test_successive_reads(self):
# Override StreamFileReaderTestCase.test_successive_reads
sfile = self.make_count_reader(nocache=True)
- self.assertEqual('1234', sfile.read(4))
- self.assertEqual('5678', sfile.read(4))
- self.assertEqual('9', sfile.read(4))
- self.assertEqual('', sfile.read(4))
+ self.assertEqual(b'1234', sfile.read(4))
+ self.assertEqual(b'5678', sfile.read(4))
+ self.assertEqual(b'9', sfile.read(4))
+ self.assertEqual(b'', sfile.read(4))
def test_tell_after_block_read(self):
# Override StreamFileReaderTestCase.test_tell_after_block_read
sfile = self.make_count_reader(nocache=True)
- self.assertEqual('12345678', sfile.read(8))
+ self.assertEqual(b'12345678', sfile.read(8))
self.assertEqual(8, sfile.tell())
def test_prefetch(self):
- keep = ArvadosFileWriterTestCase.MockKeep({"2e9ec317e197819358fbc43afca7d837+8": "01234567", "e8dc4081b13434b45189a720b77b6818+8": "abcdefgh"})
+ keep = ArvadosFileWriterTestCase.MockKeep({
+ "2e9ec317e197819358fbc43afca7d837+8": b"01234567",
+ "e8dc4081b13434b45189a720b77b6818+8": b"abcdefgh",
+ })
with Collection(". 2e9ec317e197819358fbc43afca7d837+8 e8dc4081b13434b45189a720b77b6818+8 0:16:count.txt\n", keep_client=keep) as c:
- r = c.open("count.txt", "r")
- self.assertEqual("0123", r.read(4))
+ r = c.open("count.txt", "rb")
+ self.assertEqual(b"0123", r.read(4))
self.assertIn("2e9ec317e197819358fbc43afca7d837+8", keep.requests)
self.assertIn("e8dc4081b13434b45189a720b77b6818+8", keep.requests)
def test__eq__from_writes(self):
with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
with Collection() as c2:
- f = c2.open("count1.txt", "w")
+ f = c2.open("count1.txt", "wb")
f.write("0123456789")
self.assertTrue(c1["count1.txt"] == c2["count1.txt"])
def test__ne__(self):
with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
with Collection() as c2:
- f = c2.open("count1.txt", "w")
+ f = c2.open("count1.txt", "wb")
f.write("1234567890")
self.assertTrue(c1["count1.txt"] != c2["count1.txt"])
af = ArvadosFile(col, "test",
stream=stream,
segments=segments)
+ kwargs.setdefault('mode', 'rb')
return ArvadosFileReader(af, **kwargs)
def read_for_test(self, reader, byte_count, **kwargs):
class ArvadosFileReadAllTestCase(ArvadosFileReadTestCase):
def read_for_test(self, reader, byte_count, **kwargs):
- return ''.join(reader.readall(**kwargs))
+ return b''.join(reader.readall(**kwargs))
class ArvadosFileReadAllDecompressedTestCase(ArvadosFileReadTestCase):
def read_for_test(self, reader, byte_count, **kwargs):
- return ''.join(reader.readall_decompressed(**kwargs))
+ return b''.join(reader.readall_decompressed(**kwargs))
class ArvadosFileReadlinesTestCase(ArvadosFileReadTestCase):
def read_for_test(self, reader, byte_count, **kwargs):
- return ''.join(reader.readlines(**kwargs))
+ return ''.join(reader.readlines(**kwargs)).encode()
class ArvadosFileTestCase(unittest.TestCase):
bufferblock.append("foo")
self.assertEqual(bufferblock.size(), 3)
- self.assertEqual(bufferblock.buffer_view[0:3], "foo")
+ self.assertEqual(bufferblock.buffer_view[0:3], b"foo")
self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
bufferblock.append("bar")
self.assertEqual(bufferblock.size(), 6)
- self.assertEqual(bufferblock.buffer_view[0:6], "foobar")
+ self.assertEqual(bufferblock.buffer_view[0:6], b"foobar")
self.assertEqual(bufferblock.locator(), "3858f62230ac3c915f300c664312c63f+6")
bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
bufferblock.append("foo")
self.assertEqual(bufferblock.size(), 3)
- self.assertEqual(bufferblock.buffer_view[0:3], "foo")
+ self.assertEqual(bufferblock.buffer_view[0:3], b"foo")
self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
bufferblock2.append("bar")
self.assertEqual(bufferblock2.size(), 6)
- self.assertEqual(bufferblock2.buffer_view[0:6], "foobar")
+ self.assertEqual(bufferblock2.buffer_view[0:6], b"foobar")
self.assertEqual(bufferblock2.locator(), "3858f62230ac3c915f300c664312c63f+6")
self.assertEqual(bufferblock.size(), 3)
- self.assertEqual(bufferblock.buffer_view[0:3], "foo")
+ self.assertEqual(bufferblock.buffer_view[0:3], b"foo")
self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
def test_bufferblock_get(self):
- keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
+ keep = ArvadosFileWriterTestCase.MockKeep({
+ "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
+ })
with arvados.arvfile._BlockManager(keep) as blockmanager:
bufferblock = blockmanager.alloc_bufferblock()
bufferblock.append("foo")
- self.assertEqual(blockmanager.get_block_contents("781e5e245d69b566979b86e28d23f2c7+10", 1), "0123456789")
- self.assertEqual(blockmanager.get_block_contents(bufferblock.blockid, 1), "foo")
+ self.assertEqual(blockmanager.get_block_contents("781e5e245d69b566979b86e28d23f2c7+10", 1), b"0123456789")
+ self.assertEqual(blockmanager.get_block_contents(bufferblock.blockid, 1), b"foo")
def test_bufferblock_commit(self):
mockkeep = mock.MagicMock()
+from __future__ import absolute_import
import arvados
import sys
-import run_test_server
-import arvados_testutil as tutil
-import manifest_examples
-from performance.performance_profiler import profiled
+from . import run_test_server
+from . import arvados_testutil as tutil
+from . import manifest_examples
+from .performance.performance_profiler import profiled
class CollectionBenchmark(run_test_server.TestCaseWithServers,
tutil.ArvadosBaseTestCase,
dst = arvados.collection.Collection()
with tutil.mock_keep_responses('x'*self.TEST_BLOCK_SIZE, 200):
for name in self.list_recursive(src):
- with src.open(name) as srcfile, dst.open(name, 'w') as dstfile:
+ with src.open(name, 'rb') as srcfile, dst.open(name, 'wb') as dstfile:
dstfile.write(srcfile.read())
dst.save_new()
from __future__ import print_function
+from __future__ import absolute_import
-import md5
+from builtins import str
+from builtins import range
+import hashlib
import mock
import os
import random
import threading
import unittest
-import arvados.cache
import arvados
-import run_test_server
+import arvados.cache
+from . import run_test_server
def _random(n):
- return bytearray(random.getrandbits(8) for _ in xrange(n))
+ return bytearray(random.getrandbits(8) for _ in range(n))
class CacheTestThread(threading.Thread):
for x in range(16):
try:
data_in = _random(128)
- data_in = md5.new(data_in).hexdigest() + "\n" + str(data_in)
+ data_in = hashlib.md5(data_in).hexdigest().encode() + b"\n" + data_in
c.set(url, data_in)
data_out = c.get(url)
- digest, content = data_out.split("\n", 1)
- if digest != md5.new(content).hexdigest():
+ digest, _, content = data_out.partition(b"\n")
+ if digest != hashlib.md5(content).hexdigest().encode():
self.ok = False
except Exception as err:
self.ok = False
- print("cache failed: {}".format(err), file=sys.stderr)
+ print("cache failed: {}: {}".format(type(err), err), file=sys.stderr)
+ raise
class CacheTest(unittest.TestCase):
-# usage example:
-#
-# ARVADOS_API_TOKEN=abc ARVADOS_API_HOST=arvados.local python -m unittest discover
+from __future__ import absolute_import
+from builtins import object
import arvados
import copy
import mock
import os
import pprint
import re
+import sys
import tempfile
import unittest
-import run_test_server
+from . import run_test_server
from arvados._ranges import Range, LocatorAndRange
from arvados.collection import Collection, CollectionReader
-import arvados_testutil as tutil
+from . import arvados_testutil as tutil
class TestResumableWriter(arvados.ResumableCollectionWriter):
KEEP_BLOCK_SIZE = 1024 # PUT to Keep every 1K.
self.assertEqual(cw.current_stream_name(), '.',
'current_stream_name() should be "." now')
cw.set_current_file_name('foo.txt')
- cw.write('foo')
+ cw.write(b'foo')
self.assertEqual(cw.current_file_name(), 'foo.txt',
'current_file_name() should be foo.txt now')
cw.start_new_file('bar.txt')
- cw.write('bar')
+ cw.write(b'bar')
cw.start_new_stream('baz')
- cw.write('baz')
+ cw.write(b'baz')
cw.set_current_file_name('baz.txt')
self.assertEqual(cw.manifest_text(),
". 3858f62230ac3c915f300c664312c63f+6 0:3:foo.txt 3:3:bar.txt\n" +
return cw.portable_data_hash()
def test_keep_local_store(self):
- self.assertEqual(self.keep_client.put('foo'), 'acbd18db4cc2f85cedef654fccc4a4d8+3', 'wrong md5 hash from Keep.put')
- self.assertEqual(self.keep_client.get('acbd18db4cc2f85cedef654fccc4a4d8+3'), 'foo', 'wrong data from Keep.get')
+ self.assertEqual(self.keep_client.put(b'foo'), 'acbd18db4cc2f85cedef654fccc4a4d8+3', 'wrong md5 hash from Keep.put')
+ self.assertEqual(self.keep_client.get('acbd18db4cc2f85cedef654fccc4a4d8+3'), b'foo', 'wrong data from Keep.get')
def test_local_collection_writer(self):
self.assertEqual(self.write_foo_bar_baz(),
for s in cr.all_streams():
for f in s.all_files():
got += [[f.size(), f.stream_name(), f.name(), f.read(2**26)]]
- expected = [[3, '.', 'foo.txt', 'foo'],
- [3, '.', 'bar.txt', 'bar'],
- [3, './baz', 'baz.txt', 'baz']]
+ expected = [[3, '.', 'foo.txt', b'foo'],
+ [3, '.', 'bar.txt', b'bar'],
+ [3, './baz', 'baz.txt', b'baz']]
self.assertEqual(got,
expected)
stream0 = cr.all_streams()[0]
self.assertEqual(stream0.readfrom(0, 0),
- '',
+ b'',
'reading zero bytes should have returned empty string')
self.assertEqual(stream0.readfrom(0, 2**26),
- 'foobar',
+ b'foobar',
'reading entire stream failed')
self.assertEqual(stream0.readfrom(2**26, 0),
- '',
+ b'',
'reading zero bytes should have returned empty string')
def _test_subset(self, collection, expected):
def test_collection_manifest_subset(self):
foobarbaz = self.write_foo_bar_baz()
self._test_subset(foobarbaz,
- [[3, '.', 'bar.txt', 'bar'],
- [3, '.', 'foo.txt', 'foo'],
- [3, './baz', 'baz.txt', 'baz']])
+ [[3, '.', 'bar.txt', b'bar'],
+ [3, '.', 'foo.txt', b'foo'],
+ [3, './baz', 'baz.txt', b'baz']])
self._test_subset((". %s %s 0:3:foo.txt 3:3:bar.txt\n" %
- (self.keep_client.put("foo"),
- self.keep_client.put("bar"))),
- [[3, '.', 'bar.txt', 'bar'],
- [3, '.', 'foo.txt', 'foo']])
+ (self.keep_client.put(b"foo"),
+ self.keep_client.put(b"bar"))),
+ [[3, '.', 'bar.txt', b'bar'],
+ [3, '.', 'foo.txt', b'foo']])
self._test_subset((". %s %s 0:2:fo.txt 2:4:obar.txt\n" %
- (self.keep_client.put("foo"),
- self.keep_client.put("bar"))),
- [[2, '.', 'fo.txt', 'fo'],
- [4, '.', 'obar.txt', 'obar']])
+ (self.keep_client.put(b"foo"),
+ self.keep_client.put(b"bar"))),
+ [[2, '.', 'fo.txt', b'fo'],
+ [4, '.', 'obar.txt', b'obar']])
self._test_subset((". %s %s 0:2:fo.txt 2:0:zero.txt 2:2:ob.txt 4:2:ar.txt\n" %
- (self.keep_client.put("foo"),
- self.keep_client.put("bar"))),
- [[2, '.', 'ar.txt', 'ar'],
- [2, '.', 'fo.txt', 'fo'],
- [2, '.', 'ob.txt', 'ob'],
- [0, '.', 'zero.txt', '']])
+ (self.keep_client.put(b"foo"),
+ self.keep_client.put(b"bar"))),
+ [[2, '.', 'ar.txt', b'ar'],
+ [2, '.', 'fo.txt', b'fo'],
+ [2, '.', 'ob.txt', b'ob'],
+ [0, '.', 'zero.txt', b'']])
def test_collection_empty_file(self):
cw = arvados.CollectionWriter(self.api_client)
cw.start_new_file('zero.txt')
- cw.write('')
+ cw.write(b'')
self.assertEqual(cw.manifest_text(), ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:zero.txt\n")
self.check_manifest_file_sizes(cw.manifest_text(), [0])
cw = arvados.CollectionWriter(self.api_client)
cw.start_new_file('zero.txt')
- cw.write('')
+ cw.write(b'')
cw.start_new_file('one.txt')
- cw.write('1')
+ cw.write(b'1')
cw.start_new_stream('foo')
cw.start_new_file('zero.txt')
- cw.write('')
+ cw.write(b'')
self.check_manifest_file_sizes(cw.manifest_text(), [0,1,0])
def test_no_implicit_normalize(self):
cw = arvados.CollectionWriter(self.api_client)
cw.start_new_file('b')
- cw.write('b')
+ cw.write(b'b')
cw.start_new_file('a')
- cw.write('')
+ cw.write(b'')
self.check_manifest_file_sizes(cw.manifest_text(), [1,0])
self.check_manifest_file_sizes(
arvados.CollectionReader(
return self.content[locator]
def test_stream_reader(self):
- keepblocks = {'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10': 'abcdefghij',
- 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15': 'klmnopqrstuvwxy',
- 'cccccccccccccccccccccccccccccccc+5': 'z0123'}
+ keepblocks = {
+ 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10': b'abcdefghij',
+ 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15': b'klmnopqrstuvwxy',
+ 'cccccccccccccccccccccccccccccccc+5': b'z0123',
+ }
mk = self.MockKeep(keepblocks)
sr = arvados.StreamReader([".", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+10", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+15", "cccccccccccccccccccccccccccccccc+5", "0:30:foo"], mk)
- content = 'abcdefghijklmnopqrstuvwxyz0123456789'
+ content = b'abcdefghijklmnopqrstuvwxyz0123456789'
self.assertEqual(sr.readfrom(0, 30), content[0:30])
self.assertEqual(sr.readfrom(2, 30), content[2:30])
self.assertEqual(sr.readfrom(15, 5), content[15:20])
self.assertEqual(sr.readfrom(20, 5), content[20:25])
self.assertEqual(sr.readfrom(25, 5), content[25:30])
- self.assertEqual(sr.readfrom(30, 5), '')
+ self.assertEqual(sr.readfrom(30, 5), b'')
def test_extract_file(self):
m1 = """. 5348b82a029fd9e971a811ce1f71360b+43 0:43:md5sum.txt
def test_write_multiple_files(self):
cwriter = arvados.CollectionWriter(self.api_client)
for letter in 'ABC':
- with self.make_test_file(letter) as testfile:
+ with self.make_test_file(letter.encode()) as testfile:
cwriter.write_file(testfile.name, letter)
self.assertEqual(
cwriter.manifest_text(),
with self.make_test_file() as testfile:
cwriter.write_file(testfile.name, 'test')
orig_mtime = os.fstat(testfile.fileno()).st_mtime
- testfile.write('extra')
+ testfile.write(b'extra')
testfile.flush()
os.utime(testfile.name, (orig_mtime, orig_mtime))
self.assertRaises(arvados.errors.StaleWriterStateError,
@tutil.skip_sleep
class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
- def mock_get_collection(self, api_mock, code, body):
- body = self.API_COLLECTIONS.get(body)
+ def mock_get_collection(self, api_mock, code, fixturename):
+ body = self.API_COLLECTIONS.get(fixturename)
self._mock_api_call(api_mock.collections().get, code, body)
def api_client_mock(self, status=200):
reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
num_retries=3)
with tutil.mock_keep_responses('foo', 500, 500, 200):
- self.assertEqual('foo',
- ''.join(f.read(9) for f in reader.all_files()))
+ self.assertEqual(b'foo',
+ b''.join(f.read(9) for f in reader.all_files()))
def test_read_nonnormalized_manifest_with_collection_reader(self):
# client should be able to use CollectionReader on a manifest without normalizing it
def test_open_collection_file_one_argument(self):
client = self.api_client_mock(200)
reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client)
- cfile = reader.open('./foo')
+ cfile = reader.open('./foo', 'rb')
self.check_open_file(cfile, '.', 'foo', 3)
def test_open_deep_file(self):
self.mock_get_collection(client, 200, coll_name)
reader = arvados.CollectionReader(
self.API_COLLECTIONS[coll_name]['uuid'], api_client=client)
- cfile = reader.open('./subdir2/subdir3/file2_in_subdir3.txt')
+ cfile = reader.open('./subdir2/subdir3/file2_in_subdir3.txt', 'rb')
self.check_open_file(cfile, './subdir2/subdir3', 'file2_in_subdir3.txt',
32)
kwargs.setdefault('api_client', self.api_client_mock())
writer = arvados.CollectionWriter(**kwargs)
writer.start_new_file('foo')
- writer.write('foo')
+ writer.write(b'foo')
return writer
def test_write_whole_collection(self):
with writer.open('out') as out_file:
self.assertEqual('.', writer.current_stream_name())
self.assertEqual('out', writer.current_file_name())
- out_file.write('test data')
+ out_file.write(b'test data')
data_loc = tutil.str_keep_locator('test data')
self.assertTrue(out_file.closed, "writer file not closed after context")
self.assertRaises(ValueError, out_file.write, 'extra text')
with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
writer = arvados.CollectionWriter(client)
with writer.open('flush_test') as out_file:
- out_file.write('flush1')
+ out_file.write(b'flush1')
out_file.flush()
- out_file.write('flush2')
+ out_file.write(b'flush2')
self.assertEqual(". {} {} 0:12:flush_test\n".format(data_loc1,
data_loc2),
writer.manifest_text())
client = self.api_client_mock()
writer = arvados.CollectionWriter(client)
with writer.open('.', '1') as out_file:
- out_file.write('1st')
+ out_file.write(b'1st')
with writer.open('.', '2') as out_file:
- out_file.write('2nd')
+ out_file.write(b'2nd')
data_loc = tutil.str_keep_locator('1st2nd')
with self.mock_keep(data_loc, 200) as keep_mock:
self.assertEqual(". {} 0:3:1 3:3:2\n".format(data_loc),
with self.mock_keep((data_loc1, 200), (data_loc2, 200)) as keep_mock:
writer = arvados.CollectionWriter(client)
with writer.open('file') as out_file:
- out_file.write('file')
+ out_file.write(b'file')
with writer.open('./dir', 'indir') as out_file:
- out_file.write('indir')
+ out_file.write(b'indir')
expected = ". {} 0:4:file\n./dir {} 0:5:indir\n".format(
data_loc1, data_loc2)
self.assertEqual(expected, writer.manifest_text())
self.assertRaises(arvados.errors.AssertionError, writer.open, 'two')
+class CollectionOpenModes(run_test_server.TestCaseWithServers):
+
+ def test_open_binary_modes(self):
+ c = Collection()
+ for mode in ['wb', 'wb+', 'ab', 'ab+']:
+ with c.open('foo', 'wb') as f:
+ f.write(b'foo')
+
+ def test_open_invalid_modes(self):
+ c = Collection()
+ for mode in ['+r', 'aa', '++', 'r+b', 'beer', '', None]:
+ with self.assertRaises(Exception):
+ c.open('foo', mode)
+
+ def test_open_text_modes(self):
+ c = Collection()
+ with c.open('foo', 'wb') as f:
+ f.write('foo')
+ for mode in ['r', 'rt', 'r+', 'rt+', 'w', 'wt', 'a', 'at']:
+ if sys.version_info >= (3, 0):
+ with self.assertRaises(NotImplementedError):
+ c.open('foo', mode)
+ else:
+ with c.open('foo', mode) as f:
+ if mode[0] == 'r' and '+' not in mode:
+ self.assertEqual('foo', f.read(3))
+ else:
+ f.write('bar')
+ f.seek(-3, os.SEEK_CUR)
+ self.assertEqual('bar', f.read(3))
+
+
class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
def test_replication_desired_kept_on_load(self):
c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
d = c2.diff(c1)
- self.assertEqual(d, [('del', './count2.txt', c2["count2.txt"]),
- ('add', './count1.txt', c1["count1.txt"])])
+ self.assertEqual(sorted(d), [
+ ('add', './count1.txt', c1["count1.txt"]),
+ ('del', './count2.txt', c2["count2.txt"]),
+ ])
d = c1.diff(c2)
- self.assertEqual(d, [('del', './count1.txt', c1["count1.txt"]),
- ('add', './count2.txt', c2["count2.txt"])])
+ self.assertEqual(sorted(d), [
+ ('add', './count2.txt', c2["count2.txt"]),
+ ('del', './count1.txt', c1["count1.txt"]),
+ ])
self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
c1.apply(d)
self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt 10:20:count2.txt\n')
d = c2.diff(c1)
- self.assertEqual(d, [('del', './count2.txt', c2["count2.txt"]),
- ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
+ self.assertEqual(sorted(d), [
+ ('del', './count2.txt', c2["count2.txt"]),
+ ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
+ ])
d = c1.diff(c2)
- self.assertEqual(d, [('add', './count2.txt', c2["count2.txt"]),
- ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
+ self.assertEqual(sorted(d), [
+ ('add', './count2.txt', c2["count2.txt"]),
+ ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
+ ])
self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
c1.apply(d)
c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
d = c2.diff(c1)
- self.assertEqual(d, [('del', './foo', c2["foo"]),
- ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
+ self.assertEqual(sorted(d), [
+ ('del', './foo', c2["foo"]),
+ ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
+ ])
d = c1.diff(c2)
- self.assertEqual(d, [('add', './foo', c2["foo"]),
- ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
-
+ self.assertEqual(sorted(d), [
+ ('add', './foo', c2["foo"]),
+ ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
+ ])
self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
c1.apply(d)
self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
def test_diff_del_add_in_subcollection(self):
c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:3:count3.txt\n')
-
d = c2.diff(c1)
- self.assertEqual(d, [('del', './foo/count3.txt', c2.find("foo/count3.txt")),
- ('add', './foo/count2.txt', c1.find("foo/count2.txt")),
- ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
+ self.assertEqual(sorted(d), [
+ ('add', './foo/count2.txt', c1.find("foo/count2.txt")),
+ ('del', './foo/count3.txt', c2.find("foo/count3.txt")),
+ ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
+ ])
d = c1.diff(c2)
- self.assertEqual(d, [('del', './foo/count2.txt', c1.find("foo/count2.txt")),
- ('add', './foo/count3.txt', c2.find("foo/count3.txt")),
- ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
+ self.assertEqual(sorted(d), [
+ ('add', './foo/count3.txt', c2.find("foo/count3.txt")),
+ ('del', './foo/count2.txt', c1.find("foo/count2.txt")),
+ ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
+ ])
self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
c1.apply(d)
c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
c2 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:3:foo\n')
d = c2.diff(c1)
- self.assertEqual(d, [('mod', './foo', c2["foo"], c1["foo"]),
- ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
+ self.assertEqual(sorted(d), [
+ ('mod', './foo', c2["foo"], c1["foo"]),
+ ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
+ ])
d = c1.diff(c2)
- self.assertEqual(d, [('mod', './foo', c1["foo"], c2["foo"]),
- ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"])])
+ self.assertEqual(sorted(d), [
+ ('mod', './foo', c1["foo"], c2["foo"]),
+ ('tok', './count1.txt', c2["count1.txt"], c1["count1.txt"]),
+ ])
self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
c1.apply(d)
c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
d = c1.diff(c2)
- self.assertEqual(d, [('del', './count1.txt', c1["count1.txt"]),
- ('add', './count2.txt', c2["count2.txt"])])
- f = c1.open("count1.txt", "w")
- f.write("zzzzz")
+ self.assertEqual(sorted(d), [
+ ('add', './count2.txt', c2["count2.txt"]),
+ ('del', './count1.txt', c1["count1.txt"]),
+ ])
+ f = c1.open("count1.txt", "wb")
+ f.write(b"zzzzz")
# c1 changed, so it should not be deleted.
c1.apply(d)
c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
d = c1.diff(c2)
self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
- f = c1.open("count1.txt", "w")
- f.write("zzzzz")
+ f = c1.open("count1.txt", "wb")
+ f.write(b"zzzzz")
# c1 changed, so c2 mod will go to a conflict file
c1.apply(d)
- self.assertRegexpMatches(c1.portable_manifest_text(), r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
+ self.assertRegex(
+ c1.portable_manifest_text(),
+ r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
def test_conflict_add(self):
c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt\n')
d = c1.diff(c2)
- self.assertEqual(d, [('del', './count2.txt', c1["count2.txt"]),
- ('add', './count1.txt', c2["count1.txt"])])
- f = c1.open("count1.txt", "w")
- f.write("zzzzz")
+ self.assertEqual(sorted(d), [
+ ('add', './count1.txt', c2["count1.txt"]),
+ ('del', './count2.txt', c1["count2.txt"]),
+ ])
+ f = c1.open("count1.txt", "wb")
+ f.write(b"zzzzz")
# c1 added count1.txt, so c2 add will go to a conflict file
c1.apply(d)
- self.assertRegexpMatches(c1.portable_manifest_text(), r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
+ self.assertRegex(
+ c1.portable_manifest_text(),
+ r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
def test_conflict_del(self):
c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
# c1 deleted, so c2 mod will go to a conflict file
c1.apply(d)
- self.assertRegexpMatches(c1.portable_manifest_text(), r"\. 5348b82a029fd9e971a811ce1f71360b\+43 0:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
+ self.assertRegex(
+ c1.portable_manifest_text(),
+ r"\. 5348b82a029fd9e971a811ce1f71360b\+43 0:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
def test_notify(self):
c1 = Collection()
events = []
c1.subscribe(lambda event, collection, name, item: events.append((event, collection, name, item)))
- f = c1.open("foo.txt", "w")
+ f = c1.open("foo.txt", "wb")
self.assertEqual(events[0], (arvados.collection.ADD, c1, "foo.txt", f.arvadosfile))
def test_open_w(self):
c1 = Collection(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n")
self.assertEqual(c1["count1.txt"].size(), 10)
- c1.open("count1.txt", "w").close()
+ c1.open("count1.txt", "wb").close()
self.assertEqual(c1["count1.txt"].size(), 0)
class NewCollectionTestCaseWithServers(run_test_server.TestCaseWithServers):
def test_get_manifest_text_only_committed(self):
c = Collection()
- with c.open("count.txt", "w") as f:
+ with c.open("count.txt", "wb") as f:
# One file committed
- with c.open("foo.txt", "w") as foo:
- foo.write("foo")
+ with c.open("foo.txt", "wb") as foo:
+ foo.write(b"foo")
foo.flush() # Force block commit
- f.write("0123456789")
+ f.write(b"0123456789")
# Other file not committed. Block not written to keep yet.
self.assertEqual(
c._get_manifest_text(".",
def test_only_small_blocks_are_packed_together(self):
c = Collection()
# Write a couple of small files,
- f = c.open("count.txt", "w")
- f.write("0123456789")
+ f = c.open("count.txt", "wb")
+ f.write(b"0123456789")
f.close(flush=False)
- foo = c.open("foo.txt", "w")
- foo.write("foo")
+ foo = c.open("foo.txt", "wb")
+ foo.write(b"foo")
foo.close(flush=False)
# Then, write a big file, it shouldn't be packed with the ones above
- big = c.open("bigfile.txt", "w")
- big.write("x" * 1024 * 1024 * 33) # 33 MB > KEEP_BLOCK_SIZE/2
+ big = c.open("bigfile.txt", "wb")
+ big.write(b"x" * 1024 * 1024 * 33) # 33 MB > KEEP_BLOCK_SIZE/2
big.close(flush=False)
self.assertEqual(
c.manifest_text("."),
self.assertEqual(c.portable_data_hash(), "d41d8cd98f00b204e9800998ecf8427e+0")
self.assertEqual(c.api_response()["portable_data_hash"], "d41d8cd98f00b204e9800998ecf8427e+0" )
- with c.open("count.txt", "w") as f:
- f.write("0123456789")
+ with c.open("count.txt", "wb") as f:
+ f.write(b"0123456789")
self.assertEqual(c.portable_manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
def test_create_and_save(self):
c = self.create_count_txt()
c.save()
- self.assertRegexpMatches(c.manifest_text(), r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
+ self.assertRegex(
+ c.manifest_text(),
+ r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
def test_create_and_save_new(self):
c = self.create_count_txt()
c.save_new()
- self.assertRegexpMatches(c.manifest_text(), r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
+ self.assertRegex(
+ c.manifest_text(),
+ r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
def test_create_diff_apply(self):
c1 = self.create_count_txt()
c1.save()
c2 = Collection(c1.manifest_locator())
- with c2.open("count.txt", "w") as f:
- f.write("abcdefg")
+ with c2.open("count.txt", "wb") as f:
+ f.write(b"abcdefg")
diff = c1.diff(c2)
c1.save()
c2 = arvados.collection.Collection(c1.manifest_locator())
- with c2.open("count.txt", "w") as f:
- f.write("abcdefg")
+ with c2.open("count.txt", "wb") as f:
+ f.write(b"abcdefg")
c2.save()
c1 = self.create_count_txt()
c1.save()
- with c1.open("count.txt", "w") as f:
- f.write("XYZ")
+ with c1.open("count.txt", "wb") as f:
+ f.write(b"XYZ")
c2 = arvados.collection.Collection(c1.manifest_locator())
- with c2.open("count.txt", "w") as f:
- f.write("abcdefg")
+ with c2.open("count.txt", "wb") as f:
+ f.write(b"abcdefg")
c2.save()
c1.update()
- self.assertRegexpMatches(c1.manifest_text(), r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
+ self.assertRegex(
+ c1.manifest_text(),
+ r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
if __name__ == '__main__':
-#!/usr/bin/env python
-
+from __future__ import absolute_import
import traceback
import unittest
import arvados.errors as arv_error
-import arvados_testutil as tutil
+from . import arvados_testutil as tutil
class KeepRequestErrorTestCase(unittest.TestCase):
REQUEST_ERRORS = [
message = "test plain traceback string"
test_exc = arv_error.KeepRequestError(message)
exc_report = self.traceback_str(test_exc)
- self.assertTrue(exc_report.startswith("KeepRequestError: "))
+ self.assertRegex(exc_report, r"^(arvados\.errors\.)?KeepRequestError: ")
self.assertIn(message, exc_report)
def test_traceback_str_with_request_errors(self):
message = "test traceback shows Keep services"
test_exc = arv_error.KeepRequestError(message, self.REQUEST_ERRORS[:])
exc_report = self.traceback_str(test_exc)
- self.assertTrue(exc_report.startswith("KeepRequestError: "))
- for expect_substr in [message, "raised IOError", "raised MemoryError",
- "test MemoryError", "second test IOError",
- "responded with 500 Internal Server Error"]:
- self.assertIn(expect_substr, exc_report)
+ self.assertRegex(exc_report, r"^(arvados\.errors\.)?KeepRequestError: ")
+ self.assertIn(message, exc_report)
+ for expect_re in [
+ r"raised (IOError|OSError)", # IOError in Python2, OSError in Python3
+ r"raised MemoryError",
+ r"test MemoryError",
+ r"second test IOError",
+ r"responded with 500 Internal Server Error"]:
+ self.assertRegex(exc_report, expect_re)
# Assert the report maintains order of listed services.
last_index = -1
for service_key, _ in self.REQUEST_ERRORS:
-import arvados
-import io
+from __future__ import print_function
+from __future__ import absolute_import
+from __future__ import division
+from future import standard_library
+standard_library.install_aliases()
+from builtins import range
+from builtins import object
import logging
import mock
-import Queue
-import run_test_server
+import queue
+import sys
import threading
import time
import unittest
-import arvados_testutil
+import arvados
+from . import arvados_testutil as tutil
+from . import run_test_server
+
class WebsocketTest(run_test_server.TestCaseWithServers):
MAIN_SERVER = {}
TIME_PAST = time.time()-3600
TIME_FUTURE = time.time()+3600
- MOCK_WS_URL = 'wss://[{}]/'.format(arvados_testutil.TEST_HOST)
+ MOCK_WS_URL = 'wss://[{}]/'.format(tutil.TEST_HOST)
TEST_TIMEOUT = 10.0
def _test_subscribe(self, poll_fallback, expect_type, start_time=None, expected=1):
run_test_server.authorize_with('active')
- events = Queue.Queue(100)
+ events = queue.Queue(100)
# Create ancestor before subscribing.
# When listening with start_time in the past, this should also be retrieved.
log_object_uuids.append(events.get(True, 5)['object_uuid'])
if expected < 2:
- with self.assertRaises(Queue.Empty):
+ with self.assertRaises(queue.Empty):
# assertEqual just serves to show us what unexpected
# thing comes out of the queue when the assertRaises
# fails; when the test passes, this assertEqual
error_mock = mock.MagicMock()
error_mock.resp.status = 0
error_mock._get_reason.return_value = "testing"
- api_mock.logs().list().execute.side_effect = (arvados.errors.ApiError(error_mock, ""),
- {"items": [{"id": 1}], "items_available": 1},
- arvados.errors.ApiError(error_mock, ""),
- {"items": [{"id": 1}], "items_available": 1})
+ api_mock.logs().list().execute.side_effect = (
+ arvados.errors.ApiError(error_mock, b""),
+ {"items": [{"id": 1}], "items_available": 1},
+ arvados.errors.ApiError(error_mock, b""),
+ {"items": [{"id": 1}], "items_available": 1},
+ )
pc = arvados.events.PollClient(api_mock, [], on_ev, 15, None)
pc.start()
while len(n) < 2:
return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t))
def localiso(self, t):
- return time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime(t)) + self.isotz(-time.timezone/60)
+ return time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime(t)) + self.isotz(-time.timezone//60)
def isotz(self, offset):
"""Convert minutes-east-of-UTC to RFC3339- and ISO-compatible time zone designator"""
- return '{:+03d}:{:02d}'.format(offset/60, offset%60)
+ return '{:+03d}:{:02d}'.format(offset//60, offset%60)
# Test websocket reconnection on (un)execpted close
def _test_websocket_reconnect(self, close_unexpected):
run_test_server.authorize_with('active')
- events = Queue.Queue(100)
+ events = queue.Queue(100)
- logstream = io.BytesIO()
+ logstream = tutil.StringIO()
rootLogger = logging.getLogger()
streamHandler = logging.StreamHandler(logstream)
rootLogger.addHandler(streamHandler)
# expect an event
self.assertIn(human['uuid'], events.get(True, 5)['object_uuid'])
- with self.assertRaises(Queue.Empty):
+ with self.assertRaises(queue.Empty):
self.assertEqual(events.get(True, 2), None)
# close (im)properly
event = events.get(True, 5)
if event.get('object_uuid') != None:
log_object_uuids.append(event['object_uuid'])
- with self.assertRaises(Queue.Empty):
+ with self.assertRaises(queue.Empty):
self.assertEqual(events.get(True, 2), None)
self.assertNotIn(human['uuid'], log_object_uuids)
self.assertIn(human2['uuid'], log_object_uuids)
else:
- with self.assertRaises(Queue.Empty):
+ with self.assertRaises(queue.Empty):
self.assertEqual(events.get(True, 2), None)
# verify log message to ensure that an (un)expected close
def test_websocket_reconnect_retry(self, event_client_connect):
event_client_connect.side_effect = [None, Exception('EventClient.connect error'), None]
- logstream = io.BytesIO()
+ logstream = tutil.StringIO()
rootLogger = logging.getLogger()
streamHandler = logging.StreamHandler(logstream)
rootLogger.addHandler(streamHandler)
run_test_server.authorize_with('active')
- events = Queue.Queue(100)
+ events = queue.Queue(100)
filters = [['object_uuid', 'is_a', 'arvados#human']]
self.ws = arvados.events.subscribe(
+from __future__ import absolute_import
+from __future__ import division
+from future import standard_library
+standard_library.install_aliases()
+from builtins import str
+from builtins import range
+from builtins import object
import hashlib
import mock
import os
import random
import re
import socket
-import threading
+import sys
import time
import unittest
-import urlparse
+import urllib.parse
import arvados
import arvados.retry
-import arvados_testutil as tutil
-import keepstub
-import run_test_server
+from . import arvados_testutil as tutil
+from . import keepstub
+from . import run_test_server
class KeepTestCase(run_test_server.TestCaseWithServers):
MAIN_SERVER = {}
def test_KeepBasicRWTest(self):
self.assertEqual(0, self.keep_client.upload_counter.get())
foo_locator = self.keep_client.put('foo')
- self.assertRegexpMatches(
+ self.assertRegex(
foo_locator,
'^acbd18db4cc2f85cedef654fccc4a4d8\+3',
'wrong md5 hash from Keep.put("foo"): ' + foo_locator)
self.assertEqual(0, self.keep_client.download_counter.get())
self.assertEqual(self.keep_client.get(foo_locator),
- 'foo',
+ b'foo',
'wrong content from Keep.get(md5("foo"))')
self.assertEqual(3, self.keep_client.download_counter.get())
def test_KeepBinaryRWTest(self):
- blob_str = '\xff\xfe\xf7\x00\x01\x02'
+ blob_str = b'\xff\xfe\xf7\x00\x01\x02'
blob_locator = self.keep_client.put(blob_str)
- self.assertRegexpMatches(
+ self.assertRegex(
blob_locator,
'^7fc7c53b45e53926ba52821140fef396\+6',
('wrong locator from Keep.put(<binarydata>):' + blob_locator))
'wrong content from Keep.get(md5(<binarydata>))')
def test_KeepLongBinaryRWTest(self):
- blob_str = '\xff\xfe\xfd\xfc\x00\x01\x02\x03'
+ blob_data = b'\xff\xfe\xfd\xfc\x00\x01\x02\x03'
for i in range(0,23):
- blob_str = blob_str + blob_str
- blob_locator = self.keep_client.put(blob_str)
- self.assertRegexpMatches(
+ blob_data = blob_data + blob_data
+ blob_locator = self.keep_client.put(blob_data)
+ self.assertRegex(
blob_locator,
'^84d90fc0d8175dd5dcfab04b999bc956\+67108864',
('wrong locator from Keep.put(<binarydata>): ' + blob_locator))
self.assertEqual(self.keep_client.get(blob_locator),
- blob_str,
+ blob_data,
'wrong content from Keep.get(md5(<binarydata>))')
@unittest.skip("unreliable test - please fix and close #8752")
def test_KeepSingleCopyRWTest(self):
- blob_str = '\xff\xfe\xfd\xfc\x00\x01\x02\x03'
- blob_locator = self.keep_client.put(blob_str, copies=1)
- self.assertRegexpMatches(
+ blob_data = b'\xff\xfe\xfd\xfc\x00\x01\x02\x03'
+ blob_locator = self.keep_client.put(blob_data, copies=1)
+ self.assertRegex(
blob_locator,
'^c902006bc98a3eb4a3663b65ab4a6fab\+8',
('wrong locator from Keep.put(<binarydata>): ' + blob_locator))
self.assertEqual(self.keep_client.get(blob_locator),
- blob_str,
+ blob_data,
'wrong content from Keep.get(md5(<binarydata>))')
def test_KeepEmptyCollectionTest(self):
blob_locator = self.keep_client.put('', copies=1)
- self.assertRegexpMatches(
+ self.assertRegex(
blob_locator,
'^d41d8cd98f00b204e9800998ecf8427e\+0',
('wrong locator from Keep.put(""): ' + blob_locator))
def test_unicode_must_be_ascii(self):
# If unicode type, must only consist of valid ASCII
foo_locator = self.keep_client.put(u'foo')
- self.assertRegexpMatches(
+ self.assertRegex(
foo_locator,
'^acbd18db4cc2f85cedef654fccc4a4d8\+3',
'wrong md5 hash from Keep.put("foo"): ' + foo_locator)
- with self.assertRaises(UnicodeEncodeError):
- # Error if it is not ASCII
- self.keep_client.put(u'\xe2')
+ if sys.version_info < (3, 0):
+ with self.assertRaises(UnicodeEncodeError):
+ # Error if it is not ASCII
+ self.keep_client.put(u'\xe2')
- with self.assertRaises(arvados.errors.ArgumentError):
- # Must be a string type
+ with self.assertRaises(AttributeError):
+ # Must be bytes or have an encode() method
self.keep_client.put({})
def test_KeepHeadTest(self):
locator = self.keep_client.put('test_head')
- self.assertRegexpMatches(
+ self.assertRegex(
locator,
'^b9a772c7049325feb7130fff1f8333e9\+9',
'wrong md5 hash from Keep.put for "test_head": ' + locator)
self.assertEqual(True, self.keep_client.head(locator))
self.assertEqual(self.keep_client.get(locator),
- 'test_head',
+ b'test_head',
'wrong content from Keep.get for "test_head"')
class KeepPermissionTestCase(run_test_server.TestCaseWithServers):
run_test_server.authorize_with('active')
keep_client = arvados.KeepClient()
foo_locator = keep_client.put('foo')
- self.assertRegexpMatches(
+ self.assertRegex(
foo_locator,
r'^acbd18db4cc2f85cedef654fccc4a4d8\+3\+A[a-f0-9]+@[a-f0-9]+$',
'invalid locator from Keep.put("foo"): ' + foo_locator)
self.assertEqual(keep_client.get(foo_locator),
- 'foo',
+ b'foo',
'wrong content from Keep.get(md5("foo"))')
# GET with an unsigned locator => NotFound
bar_locator = keep_client.put('bar')
unsigned_bar_locator = "37b51d194a7513e45b56f6524f2d51f2+3"
- self.assertRegexpMatches(
+ self.assertRegex(
bar_locator,
r'^37b51d194a7513e45b56f6524f2d51f2\+3\+A[a-f0-9]+@[a-f0-9]+$',
'invalid locator from Keep.put("bar"): ' + bar_locator)
def _put_foo_and_check(self):
signed_locator = self.keep_client.put('foo')
- self.assertRegexpMatches(
+ self.assertRegex(
signed_locator,
r'^acbd18db4cc2f85cedef654fccc4a4d8\+3\+A[a-f0-9]+@[a-f0-9]+$',
'invalid locator from Keep.put("foo"): ' + signed_locator)
def test_KeepAuthenticatedSignedTest(self):
signed_locator = self._put_foo_and_check()
self.assertEqual(self.keep_client.get(signed_locator),
- 'foo',
+ b'foo',
'wrong content from Keep.get(md5("foo"))')
def test_KeepAuthenticatedUnsignedTest(self):
signed_locator = self._put_foo_and_check()
self.assertEqual(self.keep_client.get("acbd18db4cc2f85cedef654fccc4a4d8"),
- 'foo',
+ b'foo',
'wrong content from Keep.get(md5("foo"))')
def test_KeepUnauthenticatedSignedTest(self):
signed_locator = self._put_foo_and_check()
self.keep_client.api_token = ''
self.assertEqual(self.keep_client.get(signed_locator),
- 'foo',
+ b'foo',
'wrong content from Keep.get(md5("foo"))')
def test_KeepUnauthenticatedUnsignedTest(self):
signed_locator = self._put_foo_and_check()
self.keep_client.api_token = ''
self.assertEqual(self.keep_client.get("acbd18db4cc2f85cedef654fccc4a4d8"),
- 'foo',
+ b'foo',
'wrong content from Keep.get(md5("foo"))')
keep_client = arvados.KeepClient(api_client=self.api_client,
local_store='')
baz_locator = keep_client.put('baz')
- self.assertRegexpMatches(
+ self.assertRegex(
baz_locator,
'^73feffa4b7f6bb68e44cf984c85f6e88\+3',
'wrong md5 hash from Keep.put("baz"): ' + baz_locator)
self.assertEqual(keep_client.get(baz_locator),
- 'baz',
+ b'baz',
'wrong content from Keep.get(md5("baz"))')
self.assertTrue(keep_client.using_proxy)
keep_client = arvados.KeepClient(api_client=self.api_client,
proxy='', local_store='')
baz_locator = keep_client.put('baz2')
- self.assertRegexpMatches(
+ self.assertRegex(
baz_locator,
'^91f372a266fe2bf2823cb8ec7fda31ce\+4',
'wrong md5 hash from Keep.put("baz2"): ' + baz_locator)
self.assertEqual(keep_client.get(baz_locator),
- 'baz2',
+ b'baz2',
'wrong content from Keep.get(md5("baz2"))')
self.assertTrue(keep_client.using_proxy)
def get_service_roots(self, api_client):
keep_client = arvados.KeepClient(api_client=api_client)
services = keep_client.weighted_service_roots(arvados.KeepLocator('0'*32))
- return [urlparse.urlparse(url) for url in sorted(services)]
+ return [urllib.parse.urlparse(url) for url in sorted(services)]
def test_ssl_flag_respected_in_roots(self):
for ssl_flag in [False, True]:
with tutil.mock_keep_responses(force_timeout, 0) as mock:
keep_client = arvados.KeepClient(api_client=api_client)
with self.assertRaises(arvados.errors.KeepWriteError):
- keep_client.put('foo')
+ keep_client.put(b'foo')
self.assertEqual(
mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
int(arvados.KeepClient.DEFAULT_TIMEOUT[0]*1000))
num_retries=3)
self.assertEqual([403, 403], [
getattr(error, 'status_code', None)
- for error in err_check.exception.request_errors().itervalues()])
+ for error in err_check.exception.request_errors().values()])
def test_get_error_reflects_last_retry(self):
self.check_errors_from_last_retry('get', arvados.errors.KeepReadError)
self.assertEqual(0, len(exc_check.exception.request_errors()))
def test_oddball_service_get(self):
- body = 'oddball service get'
+ body = b'oddball service get'
api_client = self.mock_keep_services(service_type='fancynewblobstore')
with tutil.mock_keep_responses(body, 200):
keep_client = arvados.KeepClient(api_client=api_client)
self.assertEqual(body, actual)
def test_oddball_service_put(self):
- body = 'oddball service put'
+ body = b'oddball service put'
pdh = tutil.str_keep_locator(body)
api_client = self.mock_keep_services(service_type='fancynewblobstore')
with tutil.mock_keep_responses(pdh, 200):
self.assertEqual(pdh, actual)
def test_oddball_service_writer_count(self):
- body = 'oddball service writer count'
+ body = b'oddball service writer count'
pdh = tutil.str_keep_locator(body)
api_client = self.mock_keep_services(service_type='fancynewblobstore',
count=4)
list('9d81c02e76a3bf54'),
]
self.blocks = [
- "{:064x}".format(x)
+ "{:064x}".format(x).encode()
for x in range(len(self.expected_order))]
self.hashes = [
hashlib.md5(self.blocks[x]).hexdigest()
self.assertRaises(arvados.errors.KeepRequestError):
op(i)
got_order = [
- re.search(r'//\[?keep0x([0-9a-f]+)', resp.getopt(pycurl.URL)).group(1)
+ re.search(r'//\[?keep0x([0-9a-f]+)', resp.getopt(pycurl.URL).decode()).group(1)
for resp in mock.responses]
self.assertEqual(self.expected_order[i]*2, got_order)
self.assertRaises(arvados.errors.KeepWriteError):
self.keep_client.put(self.blocks[i], num_retries=2, copies=copies)
got_order = [
- re.search(r'//\[?keep0x([0-9a-f]+)', resp.getopt(pycurl.URL)).group(1)
+ re.search(r'//\[?keep0x([0-9a-f]+)', resp.getopt(pycurl.URL).decode()).group(1)
for resp in mock.responses]
# With T threads racing to make requests, the position
# of a given server in the sequence of HTTP requests
def test_probe_waste_adding_one_server(self):
hashes = [
- hashlib.md5("{:064x}".format(x)).hexdigest() for x in range(100)]
+ hashlib.md5("{:064x}".format(x).encode()).hexdigest() for x in range(100)]
initial_services = 12
self.api_client = self.mock_keep_services(count=initial_services)
self.keep_client = arvados.KeepClient(api_client=self.api_client)
max_penalty))
def check_64_zeros_error_order(self, verb, exc_class):
- data = '0' * 64
+ data = b'0' * 64
if verb == 'get':
data = tutil.str_keep_locator(data)
# Arbitrary port number:
keep_client = arvados.KeepClient(api_client=api_client)
with mock.patch('pycurl.Curl') as curl_mock, \
self.assertRaises(exc_class) as err_check:
- curl_mock.return_value.side_effect = socket.timeout
+ curl_mock.return_value = tutil.FakeCurl.make(code=500, body=b'')
getattr(keep_client, verb)(data)
- urls = [urlparse.urlparse(url)
+ urls = [urllib.parse.urlparse(url)
for url in err_check.exception.request_errors()]
self.assertEqual([('keep0x' + c, aport) for c in '3eab2d5fc9681074'],
[(url.hostname, url.port) for url in urls])
self.check_64_zeros_error_order('put', arvados.errors.KeepWriteError)
-class KeepClientTimeout(unittest.TestCase, tutil.ApiClientMock):
+class KeepClientTimeout(keepstub.StubKeepServers, unittest.TestCase):
# BANDWIDTH_LOW_LIM must be less than len(DATA) so we can transfer
# 1s worth of data and then trigger bandwidth errors before running
# out of data.
- DATA = 'x'*2**11
+ DATA = b'x'*2**11
BANDWIDTH_LOW_LIM = 1024
TIMEOUT_TIME = 1.0
delta = round(time.time() - self.t0, 3)
self.assertGreaterEqual(delta, self.tmin)
- def setUp(self):
- sock = socket.socket()
- sock.bind(('0.0.0.0', 0))
- self.port = sock.getsockname()[1]
- sock.close()
- self.server = keepstub.Server(('0.0.0.0', self.port), keepstub.Handler)
- self.thread = threading.Thread(target=self.server.serve_forever)
- self.thread.daemon = True # Exit thread if main proc exits
- self.thread.start()
- self.api_client = self.mock_keep_services(
- count=1,
- service_host='localhost',
- service_port=self.port,
- )
-
- def tearDown(self):
- self.server.shutdown()
-
def keepClient(self, timeouts=(0.1, TIMEOUT_TIME, BANDWIDTH_LOW_LIM)):
return arvados.KeepClient(
api_client=self.api_client,
code=200, body='foo', headers={'Content-Length': 3})
self.mock_disks_and_gateways()
locator = 'acbd18db4cc2f85cedef654fccc4a4d8+3+K@' + self.gateways[0]['uuid']
- self.assertEqual('foo', self.keepClient.get(locator))
+ self.assertEqual(b'foo', self.keepClient.get(locator))
self.assertEqual(self.gateway_roots[0]+locator,
- MockCurl.return_value.getopt(pycurl.URL))
+ MockCurl.return_value.getopt(pycurl.URL).decode())
self.assertEqual(True, self.keepClient.head(locator))
@mock.patch('pycurl.Curl')
# Gateways are tried first, in the order given.
for i, root in enumerate(self.gateway_roots):
self.assertEqual(root+locator,
- mocks[i].getopt(pycurl.URL))
+ mocks[i].getopt(pycurl.URL).decode())
# Disk services are tried next.
for i in range(gateways, gateways+disks):
- self.assertRegexpMatches(
- mocks[i].getopt(pycurl.URL),
+ self.assertRegex(
+ mocks[i].getopt(pycurl.URL).decode(),
r'keep0x')
@mock.patch('pycurl.Curl')
gateways = 4
disks = 3
mocks = [
- tutil.FakeCurl.make(code=404, body='')
+ tutil.FakeCurl.make(code=404, body=b'')
for _ in range(gateways+disks)
]
MockCurl.side_effect = tutil.queue_with(mocks)
# Gateways are tried first, in the order given.
for i, root in enumerate(self.gateway_roots):
self.assertEqual(root+locator,
- mocks[i].getopt(pycurl.URL))
+ mocks[i].getopt(pycurl.URL).decode())
# Disk services are tried next.
for i in range(gateways, gateways+disks):
- self.assertRegexpMatches(
- mocks[i].getopt(pycurl.URL),
+ self.assertRegex(
+ mocks[i].getopt(pycurl.URL).decode(),
r'keep0x')
@mock.patch('pycurl.Curl')
def test_get_with_remote_proxy_hint(self, MockCurl):
MockCurl.return_value = tutil.FakeCurl.make(
- code=200, body='foo', headers={'Content-Length': 3})
+ code=200, body=b'foo', headers={'Content-Length': 3})
self.mock_disks_and_gateways()
locator = 'acbd18db4cc2f85cedef654fccc4a4d8+3+K@xyzzy'
- self.assertEqual('foo', self.keepClient.get(locator))
+ self.assertEqual(b'foo', self.keepClient.get(locator))
self.assertEqual('https://keep.xyzzy.arvadosapi.com/'+locator,
- MockCurl.return_value.getopt(pycurl.URL))
+ MockCurl.return_value.getopt(pycurl.URL).decode())
@mock.patch('pycurl.Curl')
def test_head_with_remote_proxy_hint(self, MockCurl):
MockCurl.return_value = tutil.FakeCurl.make(
- code=200, body='foo', headers={'Content-Length': 3})
+ code=200, body=b'foo', headers={'Content-Length': 3})
self.mock_disks_and_gateways()
locator = 'acbd18db4cc2f85cedef654fccc4a4d8+3+K@xyzzy'
self.assertEqual(True, self.keepClient.head(locator))
self.assertEqual('https://keep.xyzzy.arvadosapi.com/'+locator,
- MockCurl.return_value.getopt(pycurl.URL))
+ MockCurl.return_value.getopt(pycurl.URL).decode())
class KeepClientRetryTestMixin(object):
# out appropriate methods in the client.
PROXY_ADDR = 'http://[%s]:65535/' % (tutil.TEST_HOST,)
- TEST_DATA = 'testdata'
+ TEST_DATA = b'testdata'
TEST_LOCATOR = 'ef654c40ab4f1747fc699915d4f70902+8'
def setUp(self):
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
+from builtins import next
+from builtins import zip
+from builtins import str
+from builtins import range
import datetime
import itertools
import random
def numstrs(fmtstr, base, exponent):
def genstrs(self, count=None):
return (fmtstr.format(random.randint(0, base ** exponent))
- for c in xrange(count or self.DEFAULT_TEST_COUNT))
+ for c in range(count or self.DEFAULT_TEST_COUNT))
return genstrs
checksums = numstrs('{:032x}', 16, 32)
def base_locators(self, count=DEFAULT_TEST_COUNT):
return ('+'.join(pair) for pair in
- itertools.izip(self.checksums(count), self.sizes(count)))
+ zip(self.checksums(count), self.sizes(count)))
def perm_hints(self, count=DEFAULT_TEST_COUNT):
- for sig, ts in itertools.izip(self.signatures(count),
+ for sig, ts in zip(self.signatures(count),
self.timestamps(count)):
yield 'A{}@{}'.format(sig, ts)
def test_good_locators_returned(self):
for hint_gens in [(), (self.sizes(),),
(self.sizes(), self.perm_hints())]:
- for loc_data in itertools.izip(self.checksums(), *hint_gens):
+ for loc_data in zip(self.checksums(), *hint_gens):
locator = '+'.join(loc_data)
self.assertEqual(locator, str(KeepLocator(locator)))
+from __future__ import absolute_import
# usage example:
#
# ARVADOS_API_TOKEN=abc ARVADOS_API_HOST=arvados.local python -m unittest discover
import unittest
import arvados
import apiclient
-import run_test_server
+from . import run_test_server
class PipelineTemplateTest(run_test_server.TestCaseWithServers):
MAIN_SERVER = {}
-#!/usr/bin/env python
-
+from builtins import zip
+from builtins import range
+from builtins import object
import itertools
import unittest
responses = itertools.chain(results, itertools.repeat(None))
retrier = arv_retry.RetryLoop(num_retries, self.loop_success,
**kwargs)
- for tries_left, response in itertools.izip(retrier, responses):
+ for tries_left, response in zip(retrier, responses):
retrier.save_result(response)
return retrier
check_is_not = check('assertIsNot')
def test_obvious_successes(self):
- self.check_is(True, *range(200, 207))
+ self.check_is(True, *list(range(200, 207)))
def test_obvious_stops(self):
self.check_is(False, 424, 426, 428, 431,
- *range(400, 408) + range(410, 420))
+ *list(range(400, 408)) + list(range(410, 420)))
def test_obvious_retries(self):
self.check_is(None, 500, 502, 503, 504)
self.check_is(None, 408, 409, 422, 423)
def test_5xx_failures(self):
- self.check_is(False, 501, *range(505, 512))
+ self.check_is(False, 501, *list(range(505, 512)))
def test_1xx_not_retried(self):
self.check_is_not(None, 100, 101)
def test_redirects_not_retried(self):
- self.check_is_not(None, *range(300, 309))
+ self.check_is_not(None, *list(range(300, 309)))
def test_wacky_code_retries(self):
self.check_is(None, 0, 99, 600, -200)
-#!/usr/bin/env python
-
+from __future__ import absolute_import
+from builtins import object
import mock
import os
import unittest
import hashlib
-import run_test_server
+from . import run_test_server
import json
import arvados
-import arvados_testutil as tutil
+from . import arvados_testutil as tutil
from apiclient import http as apiclient_http
-#!/usr/bin/env python
-
+from __future__ import absolute_import
+from builtins import object
import bz2
import gzip
import io
from arvados import StreamReader, StreamFileReader
from arvados._ranges import Range
-import arvados_testutil as tutil
-import run_test_server
+from . import arvados_testutil as tutil
+from . import run_test_server
class StreamFileReaderTestCase(unittest.TestCase):
def make_count_reader(self):
def test_read_block_crossing_behavior(self):
# read() calls will be aligned on block boundaries - see #3663.
sfile = self.make_count_reader()
- self.assertEqual('123', sfile.read(10))
+ self.assertEqual(b'123', sfile.read(10))
def test_small_read(self):
sfile = self.make_count_reader()
- self.assertEqual('12', sfile.read(2))
+ self.assertEqual(b'12', sfile.read(2))
def test_successive_reads(self):
sfile = self.make_count_reader()
- for expect in ['123', '456', '789', '']:
+ for expect in [b'123', b'456', b'789', b'']:
self.assertEqual(expect, sfile.read(10))
def test_readfrom_spans_blocks(self):
sfile = self.make_count_reader()
- self.assertEqual('6789', sfile.readfrom(5, 12))
+ self.assertEqual(b'6789', sfile.readfrom(5, 12))
def test_small_readfrom_spanning_blocks(self):
sfile = self.make_count_reader()
- self.assertEqual('2345', sfile.readfrom(1, 4))
+ self.assertEqual(b'2345', sfile.readfrom(1, 4))
def test_readall(self):
sfile = self.make_count_reader()
- self.assertEqual('123456789', ''.join(sfile.readall()))
+ self.assertEqual(b'123456789', b''.join(sfile.readall()))
def test_one_arg_seek(self):
self.test_absolute_seek([])
def test_absolute_seek(self, args=[os.SEEK_SET]):
sfile = self.make_count_reader()
sfile.seek(6, *args)
- self.assertEqual('78', sfile.read(2))
+ self.assertEqual(b'78', sfile.read(2))
sfile.seek(4, *args)
- self.assertEqual('56', sfile.read(2))
+ self.assertEqual(b'56', sfile.read(2))
def test_relative_seek(self, args=[os.SEEK_CUR]):
sfile = self.make_count_reader()
- self.assertEqual('12', sfile.read(2))
+ self.assertEqual(b'12', sfile.read(2))
sfile.seek(2, *args)
- self.assertEqual('56', sfile.read(2))
+ self.assertEqual(b'56', sfile.read(2))
def test_end_seek(self):
sfile = self.make_count_reader()
sfile.seek(-6, os.SEEK_END)
- self.assertEqual('45', sfile.read(2))
+ self.assertEqual(b'45', sfile.read(2))
def test_seek_min_zero(self):
sfile = self.make_count_reader()
def test_context(self):
with self.make_count_reader() as sfile:
self.assertFalse(sfile.closed, "reader is closed inside context")
- self.assertEqual('12', sfile.read(2))
+ self.assertEqual(b'12', sfile.read(2))
self.assertTrue(sfile.closed, "reader is open after context")
def make_newlines_reader(self):
self.check_decompressed_name('test.log.bz2', 'test.log')
def check_decompression(self, compress_ext, compress_func):
- test_text = 'decompression\ntest\n'
+ test_text = b'decompression\ntest\n'
test_data = compress_func(test_text)
stream = tutil.MockStreamReader('.', test_data)
reader = StreamFileReader(stream, [Range(0, 0, len(test_data))],
'test.' + compress_ext)
- self.assertEqual(test_text, ''.join(reader.readall_decompressed()))
+ self.assertEqual(test_text, b''.join(reader.readall_decompressed()))
@staticmethod
def gzip_compress(data):
reader = self.make_newlines_reader()
data = reader.readline()
self.assertEqual('one\n', data)
- self.assertEqual(''.join(['two\n', '\n', 'three\n', 'four\n', '\n']), ''.join(reader.readall()))
+ self.assertEqual(b''.join([b'two\n', b'\n', b'three\n', b'four\n', b'\n']), b''.join(reader.readall()))
class StreamRetryTestMixin(object):
def test_success_without_retries(self):
with tutil.mock_keep_responses('bar', 200):
reader = self.reader_for('bar_file')
- self.assertEqual('bar', self.read_for_test(reader, 3))
+ self.assertEqual(b'bar', self.read_for_test(reader, 3))
@tutil.skip_sleep
def test_read_no_default_retry(self):
def test_read_with_instance_retries(self):
with tutil.mock_keep_responses('foo', 500, 200):
reader = self.reader_for('foo_file', num_retries=3)
- self.assertEqual('foo', self.read_for_test(reader, 3))
+ self.assertEqual(b'foo', self.read_for_test(reader, 3))
@tutil.skip_sleep
def test_read_with_method_retries(self):
with tutil.mock_keep_responses('foo', 500, 200):
reader = self.reader_for('foo_file')
- self.assertEqual('foo',
+ self.assertEqual(b'foo',
self.read_for_test(reader, 3, num_retries=3))
@tutil.skip_sleep
class StreamFileReadAllTestCase(StreamFileReadTestCase):
def read_for_test(self, reader, byte_count, **kwargs):
- return ''.join(reader.readall(**kwargs))
+ return b''.join(reader.readall(**kwargs))
class StreamFileReadAllDecompressedTestCase(StreamFileReadTestCase):
def read_for_test(self, reader, byte_count, **kwargs):
- return ''.join(reader.readall_decompressed(**kwargs))
+ return b''.join(reader.readall_decompressed(**kwargs))
class StreamFileReadlinesTestCase(StreamFileReadTestCase):
def read_for_test(self, reader, byte_count, **kwargs):
- return ''.join(reader.readlines(**kwargs))
+ return ''.join(reader.readlines(**kwargs)).encode()
if __name__ == '__main__':
unittest.main()
def runTest(self):
arvados.util.mkdir_dash_p('./tmp/foo')
with open('./tmp/bar', 'wb') as f:
- f.write('bar')
+ f.write(b'bar')
self.assertRaises(OSError, arvados.util.mkdir_dash_p, './tmp/bar')
def test_success(self):
stdout, stderr = arvados.util.run_command(['echo', 'test'],
stderr=subprocess.PIPE)
- self.assertEqual("test\n", stdout)
- self.assertEqual("", stderr)
+ self.assertEqual("test\n".encode(), stdout)
+ self.assertEqual("".encode(), stderr)
def test_failure(self):
with self.assertRaises(arvados.errors.CommandFailedError):