X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/ad69b48e324c3ce29a4d2c84732dfd3d0288ebb3..58a026e09bda4c1e2374347615c325007c64fac4:/sdk/python/arvados/_ranges.py diff --git a/sdk/python/arvados/_ranges.py b/sdk/python/arvados/_ranges.py index 15f01735e0..bb245ab2bf 100644 --- a/sdk/python/arvados/_ranges.py +++ b/sdk/python/arvados/_ranges.py @@ -1,8 +1,19 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import division +from builtins import object import logging _logger = logging.getLogger('arvados.ranges') +# Log level below 'debug' ! +RANGES_SPAM = 9 + class Range(object): + __slots__ = ("locator", "range_start", "range_size", "segment_offset") + def __init__(self, locator, range_start, range_size, segment_offset=0): self.locator = locator self.range_start = range_start @@ -10,7 +21,7 @@ class Range(object): self.segment_offset = segment_offset def __repr__(self): - return "Range(\"%s\", %i, %i, %i)" % (self.locator, self.range_start, self.range_size, self.segment_offset) + return "Range(%r, %r, %r, %r)" % (self.locator, self.range_start, self.range_size, self.segment_offset) def __eq__(self, other): return (self.locator == other.locator and @@ -18,21 +29,21 @@ class Range(object): self.range_size == other.range_size and self.segment_offset == other.segment_offset) -def first_block(data_locators, range_start, range_size): - block_start = 0L +def first_block(data_locators, range_start): + block_start = 0 # range_start/block_start is the inclusive lower bound # range_end/block_end is the exclusive upper bound hi = len(data_locators) lo = 0 - i = int((hi + lo) / 2) + i = (hi + lo) // 2 block_size = data_locators[i].range_size block_start = data_locators[i].range_start block_end = block_start + block_size # perform a binary search for the first block - # assumes that all of the blocks are contigious, so range_start is guaranteed + # assumes that all of the blocks are contiguous, so range_start is guaranteed # to either fall into the range of a block or be outside the block range entirely while not (range_start >= block_start and range_start < block_end): if lo == i: @@ -42,7 +53,7 @@ def first_block(data_locators, range_start, range_size): lo = i else: hi = i - i = int((hi + lo) / 2) + i = (hi + lo) // 2 block_size = data_locators[i].range_size block_start = data_locators[i].range_start block_end = block_start + block_size @@ -50,6 +61,8 @@ def first_block(data_locators, range_start, range_size): return i class LocatorAndRange(object): + __slots__ = ("locator", "block_size", "segment_offset", "segment_size") + def __init__(self, locator, block_size, segment_offset, segment_size): self.locator = locator self.block_size = block_size @@ -63,14 +76,15 @@ class LocatorAndRange(object): self.segment_size == other.segment_size) def __repr__(self): - return "LocatorAndRange(\"%s\", %i, %i, %i)" % (self.locator, self.block_size, self.segment_offset, self.segment_size) + return "LocatorAndRange(%r, %r, %r, %r)" % (self.locator, self.block_size, self.segment_offset, self.segment_size) + +def locators_and_ranges(data_locators, range_start, range_size, limit=None): + """Get blocks that are covered by a range. -def locators_and_ranges(data_locators, range_start, range_size): - """Get blocks that are covered by the range and return list of LocatorAndRange - objects. + Returns a list of LocatorAndRange objects. :data_locators: - list of Range objects, assumes that blocks are in order and contigous + list of Range objects, assumes that blocks are in order and contiguous :range_start: start of range @@ -78,35 +92,35 @@ def locators_and_ranges(data_locators, range_start, range_size): :range_size: size of range + :limit: + Maximum segments to return, default None (unlimited). Will truncate the + result if there are more segments needed to cover the range than the + limit. + """ if range_size == 0: return [] resp = [] - range_start = range_start - range_size = range_size range_end = range_start + range_size - i = first_block(data_locators, range_start, range_size) + i = first_block(data_locators, range_start) if i is None: return [] - while i < len(data_locators): + # We should always start at the first segment due to the binary + # search. + while i < len(data_locators) and len(resp) != limit: dl = data_locators[i] block_start = dl.range_start block_size = dl.range_size block_end = block_start + block_size - _logger.debug(dl.locator, "range_start", range_start, "block_start", block_start, "range_end", range_end, "block_end", block_end) + _logger.log(RANGES_SPAM, + "L&R %s range_start %s block_start %s range_end %s block_end %s", + dl.locator, range_start, block_start, range_end, block_end) if range_end <= block_start: # range ends before this block starts, so don't look at any more locators break - #if range_start >= block_end: - # Range starts after this block ends, so go to next block. - # We should always start at the first block due to the binary - # search above, so this test is unnecessary but useful to help - # document the algorithm. - #next - if range_start >= block_start and range_end <= block_end: # range starts and ends in this block resp.append(LocatorAndRange(dl.locator, block_size, dl.segment_offset + (range_start - block_start), range_size)) @@ -131,7 +145,7 @@ def replace_range(data_locators, new_range_start, new_range_size, new_locator, n data_locators will be updated in place :data_locators: - list of Range objects, assumes that segments are in order and contigous + list of Range objects, assumes that segments are in order and contiguous :new_range_start: start of range to replace in data_locators @@ -149,8 +163,6 @@ def replace_range(data_locators, new_range_start, new_range_size, new_locator, n if new_range_size == 0: return - new_range_start = new_range_start - new_range_size = new_range_size new_range_end = new_range_start + new_range_size if len(data_locators) == 0: @@ -159,34 +171,32 @@ def replace_range(data_locators, new_range_start, new_range_size, new_locator, n last = data_locators[-1] if (last.range_start+last.range_size) == new_range_start: - if last.locator == new_locator: + if last.locator == new_locator and (last.segment_offset+last.range_size) == new_segment_offset: # extend last segment last.range_size += new_range_size else: data_locators.append(Range(new_locator, new_range_start, new_range_size, new_segment_offset)) return - i = first_block(data_locators, new_range_start, new_range_size) + i = first_block(data_locators, new_range_start) if i is None: return + # We should always start at the first segment due to the binary + # search. while i < len(data_locators): dl = data_locators[i] old_segment_start = dl.range_start old_segment_end = old_segment_start + dl.range_size - _logger.debug(dl, "range_start", new_range_start, "segment_start", old_segment_start, "range_end", new_range_end, "segment_end", old_segment_end) + _logger.log(RANGES_SPAM, + "RR %s range_start %s segment_start %s range_end %s segment_end %s", + dl, new_range_start, old_segment_start, new_range_end, + old_segment_end) if new_range_end <= old_segment_start: # range ends before this segment starts, so don't look at any more locators break - #if range_start >= old_segment_end: - # Range starts after this segment ends, so go to next segment. - # We should always start at the first segment due to the binary - # search above, so this test is unnecessary but useful to help - # document the algorithm. - #next - - if old_segment_start <= new_range_start and new_range_end <= old_segment_end: + if old_segment_start <= new_range_start and new_range_end <= old_segment_end: # new range starts and ends in old segment # split segment into up to 3 pieces if (new_range_start-old_segment_start) > 0: