byteunits[exponent])
def percentageFloor(x):
-""" Returns a float which is the input rounded down to the neared 0.01.
+ """ Returns a float which is the input rounded down to the neared 0.01.
e.g. precentageFloor(0.941354) = 0.94
"""
blocks = []
for host,port in keep_servers:
response = urllib2.urlopen('http://%s:%d/index' % (host, port))
- blocks.append([line.split(' ')
- for line in response.read().split('\n')
- if line])
+ server_blocks = [line.split(' ')
+ for line in response.read().split('\n')
+ if line]
+ server_blocks = [(block_id, int(mtime))
+ for block_id, mtime in server_blocks]
+ blocks.append(server_blocks)
return blocks
def getKeepStats(keep_servers):
for line in garbage_collection_report:
gcwriter.writerow(line)
-
def computeGarbageCollectionHistogram():
+ # TODO(misha): Modify this to allow users to specify the number of
+ # histogram buckets through a flag.
histogram = []
last_percentage = -1
for _,mtime,_,_,disk_free in garbage_collection_report:
curr_percentage = percentageFloor(disk_free)
if curr_percentage > last_percentage:
- histogram.append( (curr_percentage, mtime) )
+ histogram.append( (mtime, curr_percentage) )
last_percentage = curr_percentage
log.info('Garbage collection histogram is: %s', histogram)
return histogram
+def logGarbageCollectionHistogram():
+ body = {}
+ # TODO(misha): Decide whether we should specify an object_uuid in
+ # the body and if so, which uuid to use.
+ body['event_type'] = args.block_age_free_space_histogram_log_event_type
+ properties = {}
+ properties['histogram'] = garbage_collection_histogram
+ body['properties'] = properties
+ # TODO(misha): Confirm that this will throw an exception if it
+ # fails to create the log entry.
+ arv.logs().create(body=body).execute()
+
+
def detectReplicationProblems():
blocks_not_in_any_collections.update(
set(block_to_replication.keys()).difference(block_to_collections.keys()))
default='user-storage-report',
help=('The event type to set when logging user '
'storage usage to workbench.'))
+parser.add_argument('--block-age-free-space-histogram-log-event-type',
+ default='block-age-free-space-histogram',
+ help=('The event type to set when logging user '
+ 'storage usage to workbench.'))
parser.add_argument('--garbage-collection-file',
default='',
help=('The file to write a garbage collection report, or '
garbage_collection_histogram = []
""" Shows the tradeoff of keep block age vs keep disk free space.
-Each entry is of the form (Disk Proportion, mtime).
+Each entry is of the form (mtime, Disk Proportion).
-An entry of the form (0.52, 1388747781) means that if we deleted the
-olded non-presisted blocks until we had 52% of the disk free, the
-oldest non-persisted block we'd have left would have an mtime of
-1388747781.
+An entry of the form (1388747781, 0.52) means that if we deleted the
+oldest non-presisted blocks until we had 52% of the disk free, then
+all blocks with an mtime greater than 1388747781 would be preserved.
"""
# Stuff to report on
args.garbage_collection_file)
outputGarbageCollectionReport(args.garbage_collection_file)
+ global garbage_collection_histogram
garbage_collection_histogram = computeGarbageCollectionHistogram()
+ if args.log_to_workbench:
+ logGarbageCollectionHistogram()
+
detectReplicationProblems()
computeUserStorageUsage()