Merge branch '8555-s3-trash'
[arvados.git] / services / datamanager / experimental / datamanager.py
index 00cc554a07a71abdc387156933b2c2478e42183d..8207bdcd5cb77b232b5fa582d9b487f08a98251b 100755 (executable)
@@ -29,7 +29,7 @@ def fileSizeFormat(value):
                          byteunits[exponent])
 
 def percentageFloor(x):
-""" Returns a float which is the input rounded down to the neared 0.01.
+  """ Returns a float which is the input rounded down to the neared 0.01.
 
 e.g. precentageFloor(0.941354) = 0.94
 """
@@ -358,9 +358,12 @@ def getKeepBlocks(keep_servers):
   blocks = []
   for host,port in keep_servers:
     response = urllib2.urlopen('http://%s:%d/index' % (host, port))
-    blocks.append([line.split(' ')
-                   for line in response.read().split('\n')
-                   if line])
+    server_blocks = [line.split(' ')
+                     for line in response.read().split('\n')
+                     if line]
+    server_blocks = [(block_id, int(mtime))
+                     for block_id, mtime in server_blocks]
+    blocks.append(server_blocks)
   return blocks
 
 def getKeepStats(keep_servers):
@@ -429,14 +432,15 @@ def outputGarbageCollectionReport(filename):
     for line in garbage_collection_report:
       gcwriter.writerow(line)
 
-
 def computeGarbageCollectionHistogram():
+  # TODO(misha): Modify this to allow users to specify the number of
+  # histogram buckets through a flag.
   histogram = []
   last_percentage = -1
   for _,mtime,_,_,disk_free in garbage_collection_report:
     curr_percentage = percentageFloor(disk_free)
     if curr_percentage > last_percentage:
-      histogram.append( (curr_percentage, mtime) )
+      histogram.append( (mtime, curr_percentage) )
     last_percentage = curr_percentage
 
   log.info('Garbage collection histogram is: %s', histogram)
@@ -444,6 +448,19 @@ def computeGarbageCollectionHistogram():
   return histogram
 
 
+def logGarbageCollectionHistogram():
+  body = {}
+  # TODO(misha): Decide whether we should specify an object_uuid in
+  # the body and if so, which uuid to use.
+  body['event_type'] = args.block_age_free_space_histogram_log_event_type
+  properties = {}
+  properties['histogram'] = garbage_collection_histogram
+  body['properties'] = properties
+  # TODO(misha): Confirm that this will throw an exception if it
+  # fails to create the log entry.
+  arv.logs().create(body=body).execute()
+
+
 def detectReplicationProblems():
   blocks_not_in_any_collections.update(
     set(block_to_replication.keys()).difference(block_to_collections.keys()))
@@ -524,6 +541,10 @@ parser.add_argument('--user-storage-log-event-type',
                     default='user-storage-report',
                     help=('The event type to set when logging user '
                           'storage usage to workbench.'))
+parser.add_argument('--block-age-free-space-histogram-log-event-type',
+                    default='block-age-free-space-histogram',
+                    help=('The event type to set when logging user '
+                          'storage usage to workbench.'))
 parser.add_argument('--garbage-collection-file',
                     default='',
                     help=('The file to write a garbage collection report, or '
@@ -588,12 +609,11 @@ cumulative disk size) / total disk capacity
 garbage_collection_histogram = []
 """ Shows the tradeoff of keep block age vs keep disk free space.
 
-Each entry is of the form (Disk Proportion, mtime).
+Each entry is of the form (mtime, Disk Proportion).
 
-An entry of the form (0.52, 1388747781) means that if we deleted the
-olded non-presisted blocks until we had 52% of the disk free, the
-oldest non-persisted block we'd have left would have an mtime of
-1388747781.
+An entry of the form (1388747781, 0.52) means that if we deleted the
+oldest non-presisted blocks until we had 52% of the disk free, then
+all blocks with an mtime greater than 1388747781 would be preserved.
 """
 
 # Stuff to report on
@@ -667,8 +687,12 @@ def loadAllData():
              args.garbage_collection_file)
     outputGarbageCollectionReport(args.garbage_collection_file)
 
+  global garbage_collection_histogram
   garbage_collection_histogram = computeGarbageCollectionHistogram()
 
+  if args.log_to_workbench:
+    logGarbageCollectionHistogram()
+
   detectReplicationProblems()
 
   computeUserStorageUsage()