X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/7e814093e36dd6fc96353584c2c0797f4654f317..0eb72b526bf8bbb011551ecf019f604e17a534f1:/sdk/python/arvados/_ranges.py diff --git a/sdk/python/arvados/_ranges.py b/sdk/python/arvados/_ranges.py index 947b35fe0f..bb245ab2bf 100644 --- a/sdk/python/arvados/_ranges.py +++ b/sdk/python/arvados/_ranges.py @@ -1,8 +1,19 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import division +from builtins import object import logging _logger = logging.getLogger('arvados.ranges') +# Log level below 'debug' ! +RANGES_SPAM = 9 + class Range(object): + __slots__ = ("locator", "range_start", "range_size", "segment_offset") + def __init__(self, locator, range_start, range_size, segment_offset=0): self.locator = locator self.range_start = range_start @@ -18,15 +29,15 @@ class Range(object): self.range_size == other.range_size and self.segment_offset == other.segment_offset) -def first_block(data_locators, range_start, range_size): - block_start = 0L +def first_block(data_locators, range_start): + block_start = 0 # range_start/block_start is the inclusive lower bound # range_end/block_end is the exclusive upper bound hi = len(data_locators) lo = 0 - i = int((hi + lo) / 2) + i = (hi + lo) // 2 block_size = data_locators[i].range_size block_start = data_locators[i].range_start block_end = block_start + block_size @@ -42,7 +53,7 @@ def first_block(data_locators, range_start, range_size): lo = i else: hi = i - i = int((hi + lo) / 2) + i = (hi + lo) // 2 block_size = data_locators[i].range_size block_start = data_locators[i].range_start block_end = block_start + block_size @@ -50,6 +61,8 @@ def first_block(data_locators, range_start, range_size): return i class LocatorAndRange(object): + __slots__ = ("locator", "block_size", "segment_offset", "segment_size") + def __init__(self, locator, block_size, segment_offset, segment_size): self.locator = locator self.block_size = block_size @@ -65,7 +78,7 @@ class LocatorAndRange(object): def __repr__(self): return "LocatorAndRange(%r, %r, %r, %r)" % (self.locator, self.block_size, self.segment_offset, self.segment_size) -def locators_and_ranges(data_locators, range_start, range_size): +def locators_and_ranges(data_locators, range_start, range_size, limit=None): """Get blocks that are covered by a range. Returns a list of LocatorAndRange objects. @@ -79,24 +92,31 @@ def locators_and_ranges(data_locators, range_start, range_size): :range_size: size of range + :limit: + Maximum segments to return, default None (unlimited). Will truncate the + result if there are more segments needed to cover the range than the + limit. + """ if range_size == 0: return [] resp = [] range_end = range_start + range_size - i = first_block(data_locators, range_start, range_size) + i = first_block(data_locators, range_start) if i is None: return [] # We should always start at the first segment due to the binary # search. - while i < len(data_locators): + while i < len(data_locators) and len(resp) != limit: dl = data_locators[i] block_start = dl.range_start block_size = dl.range_size block_end = block_start + block_size - _logger.debug(dl.locator, "range_start", range_start, "block_start", block_start, "range_end", range_end, "block_end", block_end) + _logger.log(RANGES_SPAM, + "L&R %s range_start %s block_start %s range_end %s block_end %s", + dl.locator, range_start, block_start, range_end, block_end) if range_end <= block_start: # range ends before this block starts, so don't look at any more locators break @@ -151,14 +171,14 @@ def replace_range(data_locators, new_range_start, new_range_size, new_locator, n last = data_locators[-1] if (last.range_start+last.range_size) == new_range_start: - if last.locator == new_locator: + if last.locator == new_locator and (last.segment_offset+last.range_size) == new_segment_offset: # extend last segment last.range_size += new_range_size else: data_locators.append(Range(new_locator, new_range_start, new_range_size, new_segment_offset)) return - i = first_block(data_locators, new_range_start, new_range_size) + i = first_block(data_locators, new_range_start) if i is None: return @@ -168,12 +188,15 @@ def replace_range(data_locators, new_range_start, new_range_size, new_locator, n dl = data_locators[i] old_segment_start = dl.range_start old_segment_end = old_segment_start + dl.range_size - _logger.debug(dl, "range_start", new_range_start, "segment_start", old_segment_start, "range_end", new_range_end, "segment_end", old_segment_end) + _logger.log(RANGES_SPAM, + "RR %s range_start %s segment_start %s range_end %s segment_end %s", + dl, new_range_start, old_segment_start, new_range_end, + old_segment_end) if new_range_end <= old_segment_start: # range ends before this segment starts, so don't look at any more locators break - if old_segment_start <= new_range_start and new_range_end <= old_segment_end: + if old_segment_start <= new_range_start and new_range_end <= old_segment_end: # new range starts and ends in old segment # split segment into up to 3 pieces if (new_range_start-old_segment_start) > 0: