X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/aade22d7587e9f5921edb18ad76792a60eaa92fb..b9ad383c4e27d4e3c1945e14ba51fffdd61fdb36:/services/datamanager/experimental/datamanager.py diff --git a/services/datamanager/experimental/datamanager.py b/services/datamanager/experimental/datamanager.py index 00cc554a07..8207bdcd5c 100755 --- a/services/datamanager/experimental/datamanager.py +++ b/services/datamanager/experimental/datamanager.py @@ -29,7 +29,7 @@ def fileSizeFormat(value): byteunits[exponent]) def percentageFloor(x): -""" Returns a float which is the input rounded down to the neared 0.01. + """ Returns a float which is the input rounded down to the neared 0.01. e.g. precentageFloor(0.941354) = 0.94 """ @@ -358,9 +358,12 @@ def getKeepBlocks(keep_servers): blocks = [] for host,port in keep_servers: response = urllib2.urlopen('http://%s:%d/index' % (host, port)) - blocks.append([line.split(' ') - for line in response.read().split('\n') - if line]) + server_blocks = [line.split(' ') + for line in response.read().split('\n') + if line] + server_blocks = [(block_id, int(mtime)) + for block_id, mtime in server_blocks] + blocks.append(server_blocks) return blocks def getKeepStats(keep_servers): @@ -429,14 +432,15 @@ def outputGarbageCollectionReport(filename): for line in garbage_collection_report: gcwriter.writerow(line) - def computeGarbageCollectionHistogram(): + # TODO(misha): Modify this to allow users to specify the number of + # histogram buckets through a flag. histogram = [] last_percentage = -1 for _,mtime,_,_,disk_free in garbage_collection_report: curr_percentage = percentageFloor(disk_free) if curr_percentage > last_percentage: - histogram.append( (curr_percentage, mtime) ) + histogram.append( (mtime, curr_percentage) ) last_percentage = curr_percentage log.info('Garbage collection histogram is: %s', histogram) @@ -444,6 +448,19 @@ def computeGarbageCollectionHistogram(): return histogram +def logGarbageCollectionHistogram(): + body = {} + # TODO(misha): Decide whether we should specify an object_uuid in + # the body and if so, which uuid to use. + body['event_type'] = args.block_age_free_space_histogram_log_event_type + properties = {} + properties['histogram'] = garbage_collection_histogram + body['properties'] = properties + # TODO(misha): Confirm that this will throw an exception if it + # fails to create the log entry. + arv.logs().create(body=body).execute() + + def detectReplicationProblems(): blocks_not_in_any_collections.update( set(block_to_replication.keys()).difference(block_to_collections.keys())) @@ -524,6 +541,10 @@ parser.add_argument('--user-storage-log-event-type', default='user-storage-report', help=('The event type to set when logging user ' 'storage usage to workbench.')) +parser.add_argument('--block-age-free-space-histogram-log-event-type', + default='block-age-free-space-histogram', + help=('The event type to set when logging user ' + 'storage usage to workbench.')) parser.add_argument('--garbage-collection-file', default='', help=('The file to write a garbage collection report, or ' @@ -588,12 +609,11 @@ cumulative disk size) / total disk capacity garbage_collection_histogram = [] """ Shows the tradeoff of keep block age vs keep disk free space. -Each entry is of the form (Disk Proportion, mtime). +Each entry is of the form (mtime, Disk Proportion). -An entry of the form (0.52, 1388747781) means that if we deleted the -olded non-presisted blocks until we had 52% of the disk free, the -oldest non-persisted block we'd have left would have an mtime of -1388747781. +An entry of the form (1388747781, 0.52) means that if we deleted the +oldest non-presisted blocks until we had 52% of the disk free, then +all blocks with an mtime greater than 1388747781 would be preserved. """ # Stuff to report on @@ -667,8 +687,12 @@ def loadAllData(): args.garbage_collection_file) outputGarbageCollectionReport(args.garbage_collection_file) + global garbage_collection_histogram garbage_collection_histogram = computeGarbageCollectionHistogram() + if args.log_to_workbench: + logGarbageCollectionHistogram() + detectReplicationProblems() computeUserStorageUsage()