X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/b0e2fe9d0a18d5e809bc8c0d3382e0e023cb949c..22286e8b81fa7644500e197b95e6d6417ed25f7e:/sdk/python/arvados/ranges.py diff --git a/sdk/python/arvados/ranges.py b/sdk/python/arvados/ranges.py index fe9c04b451..8c377e26e7 100644 --- a/sdk/python/arvados/ranges.py +++ b/sdk/python/arvados/ranges.py @@ -1,7 +1,12 @@ -LOCATOR = 0 -BLOCKSIZE = 1 -OFFSET = 2 -SEGMENTSIZE = 3 +class Range(object): + def __init__(self, locator, range_start, range_size, segment_offset=0): + self.locator = locator + self.range_start = range_start + self.range_size = range_size + self.segment_offset = segment_offset + + def __repr__(self): + return "[\"%s\", %i, %i, %i]" % (self.locator, self.range_start, self.range_size, self.segment_offset) def first_block(data_locators, range_start, range_size, debug=False): block_start = 0L @@ -12,8 +17,8 @@ def first_block(data_locators, range_start, range_size, debug=False): hi = len(data_locators) lo = 0 i = int((hi + lo) / 2) - block_size = data_locators[i][BLOCKSIZE] - block_start = data_locators[i][OFFSET] + block_size = data_locators[i].range_size + block_start = data_locators[i].range_start block_end = block_start + block_size if debug: print '---' @@ -30,19 +35,35 @@ def first_block(data_locators, range_start, range_size, debug=False): hi = i i = int((hi + lo) / 2) if debug: print lo, i, hi - block_size = data_locators[i][BLOCKSIZE] - block_start = data_locators[i][OFFSET] + block_size = data_locators[i].range_size + block_start = data_locators[i].range_start block_end = block_start + block_size return i +class LocatorAndRange(object): + def __init__(self, locator, block_size, segment_offset, segment_size): + self.locator = locator + self.block_size = block_size + self.segment_offset = segment_offset + self.segment_size = segment_size + + def __eq__(self, other): + return (self.locator == other.locator and + self.block_size == other.block_size and + self.segment_offset == other.segment_offset and + self.segment_size == other.segment_size) + + def __repr__(self): + return "[\"%s\", %i, %i, %i]" % (self.locator, self.block_size, self.segment_offset, self.segment_size) + def locators_and_ranges(data_locators, range_start, range_size, debug=False): ''' Get blocks that are covered by the range - data_locators: list of [locator, block_size, block_start], assumes that blocks are in order and contigous + data_locators: list of Range objects, assumes that blocks are in order and contigous range_start: start of range range_size: size of range - returns list of [block locator, blocksize, segment offset, segment size] that satisfies the range + returns list of LocatorAndRange objects ''' if range_size == 0: return [] @@ -56,10 +77,12 @@ def locators_and_ranges(data_locators, range_start, range_size, debug=False): return [] while i < len(data_locators): - locator, block_size, block_start = data_locators[i] + dl = data_locators[i] + block_start = dl.range_start + block_size = dl.range_size block_end = block_start + block_size if debug: - print locator, "range_start", range_start, "block_start", block_start, "range_end", range_end, "block_end", block_end + print dl.locator, "range_start", range_start, "block_start", block_start, "range_end", range_end, "block_end", block_end if range_end <= block_start: # range ends before this block starts, so don't look at any more locators break @@ -71,79 +94,94 @@ def locators_and_ranges(data_locators, range_start, range_size, debug=False): if range_start >= block_start and range_end <= block_end: # range starts and ends in this block - resp.append([locator, block_size, range_start - block_start, range_size]) + resp.append(LocatorAndRange(dl.locator, block_size, dl.segment_offset + (range_start - block_start), range_size)) elif range_start >= block_start and range_end > block_end: # range starts in this block - resp.append([locator, block_size, range_start - block_start, block_end - range_start]) + resp.append(LocatorAndRange(dl.locator, block_size, dl.segment_offset + (range_start - block_start), block_end - range_start)) elif range_start < block_start and range_end > block_end: # range starts in a previous block and extends to further blocks - resp.append([locator, block_size, 0L, block_size]) + resp.append(LocatorAndRange(dl.locator, block_size, dl.segment_offset, block_size)) elif range_start < block_start and range_end <= block_end: # range starts in a previous block and ends in this block - resp.append([locator, block_size, 0L, range_end - block_start]) + resp.append(LocatorAndRange(dl.locator, block_size, dl.segment_offset, range_end - block_start)) block_start = block_end i += 1 return resp -def replace_range(data_locators, range_start, range_size, new_locator, debug=False): +def replace_range(data_locators, new_range_start, new_range_size, new_locator, new_segment_offset, debug=False): ''' - Replace a range with a new block. - data_locators: list of [locator, block_size, block_start], assumes that blocks are in order and contigous + Replace a file segment range with a new segment. + data_locators: list of Range objects, assumes that segments are in order and contigous range_start: start of range range_size: size of range - new_locator: locator for new block to be inserted + new_locator: locator for new segment to be inserted !!! data_locators will be updated in place !!! ''' - if range_size == 0: + if new_range_size == 0: return - range_start = long(range_start) - range_size = long(range_size) - range_end = range_start + range_size + new_range_start = long(new_range_start) + new_range_size = long(new_range_size) + new_range_end = new_range_start + new_range_size + + if len(data_locators) == 0: + data_locators.append(Range(new_locator, new_range_start, new_range_size, new_segment_offset)) + return last = data_locators[-1] - if (last[OFFSET]+last[BLOCKSIZE]) == range_start: - # append new block - data_locators.append([new_locator, range_size, range_start]) + if (last.range_start+last.range_size) == new_range_start: + if last.locator == new_locator: + # extend last segment + last.range_size += new_range_size + else: + data_locators.append(Range(new_locator, new_range_start, new_range_size, new_segment_offset)) return - i = first_block(data_locators, range_start, range_size, debug) + i = first_block(data_locators, new_range_start, new_range_size, debug) if i is None: return while i < len(data_locators): - locator, block_size, block_start = data_locators[i] - block_end = block_start + block_size + dl = data_locators[i] + old_segment_start = dl.range_start + old_segment_end = old_segment_start + dl.range_size if debug: - print locator, "range_start", range_start, "block_start", block_start, "range_end", range_end, "block_end", block_end - if range_end <= block_start: - # range ends before this block starts, so don't look at any more locators + print locator, "range_start", new_range_start, "segment_start", old_segment_start, "range_end", new_range_end, "segment_end", old_segment_end + if new_range_end <= old_segment_start: + # range ends before this segment starts, so don't look at any more locators break - #if range_start >= block_end: - # range starts after this block ends, so go to next block - # we should always start at the first block due to the binary above, so this test is redundant + #if range_start >= old_segment_end: + # range starts after this segment ends, so go to next segment + # we should always start at the first segment due to the binary above, so this test is redundant #next - if range_start >= block_start and range_end <= block_end: - # range starts and ends in this block - # split block into 3 pieces - #resp.append([locator, block_size, range_start - block_start, range_size]) - pass - elif range_start >= block_start and range_end > block_end: - # range starts in this block - # split block into 2 pieces - #resp.append([locator, block_size, range_start - block_start, block_end - range_start]) - pass - elif range_start < block_start and range_end > block_end: - # range starts in a previous block and extends to further blocks - # zero out this block - #resp.append([locator, block_size, 0L, block_size]) - pass - elif range_start < block_start and range_end <= block_end: - # range starts in a previous block and ends in this block - # split into 2 pieces - #resp.append([locator, block_size, 0L, range_end - block_start]) - pass - block_start = block_end + if old_segment_start <= new_range_start and new_range_end <= old_segment_end: + # new range starts and ends in old segment + # split segment into up to 3 pieces + if (new_range_start-old_segment_start) > 0: + data_locators[i] = Range(dl.locator, old_segment_start, (new_range_start-old_segment_start), dl.segment_offset) + data_locators.insert(i+1, Range(new_locator, new_range_start, new_range_size, new_segment_offset)) + else: + data_locators[i] = Range(new_locator, new_range_start, new_range_size, new_segment_offset) + i -= 1 + if (old_segment_end-new_range_end) > 0: + data_locators.insert(i+2, Range(dl.locator, new_range_end, (old_segment_end-new_range_end), dl.segment_offset + (new_range_start-old_segment_start) + new_range_size)) + return + elif old_segment_start <= new_range_start and new_range_end > old_segment_end: + # range starts in this segment + # split segment into 2 pieces + data_locators[i] = Range(dl.locator, old_segment_start, (new_range_start-old_segment_start), dl.segment_offset) + data_locators.insert(i+1, Range(new_locator, new_range_start, new_range_size, new_segment_offset)) + i += 1 + elif new_range_start < old_segment_start and new_range_end >= old_segment_end: + # range starts in a previous segment and extends to further segments + # delete this segment + del data_locators[i] + i -= 1 + elif new_range_start < old_segment_start and new_range_end < old_segment_end: + # range starts in a previous segment and ends in this segment + # move the starting point of this segment up, and shrink it. + data_locators[i] = Range(dl.locator, new_range_end, (old_segment_end-new_range_end), dl.segment_offset + (new_range_end-old_segment_start)) + return i += 1