2798: Added comments to Python shuffled_service_roots and ported function to Go
authorPeter Amstutz <peter.amstutz@curoverse.com>
Sat, 10 May 2014 21:07:59 +0000 (17:07 -0400)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Sat, 10 May 2014 21:07:59 +0000 (17:07 -0400)
sdk/go/src/arvados.org/keepclient/keepclient.go
sdk/python/arvados/keep.py

index 2bb5ff3eb19e163c4e1a52fca1c79507a2189f4b..6ca0eea908c89857b9010192481cff5987a34de8 100644 (file)
@@ -6,6 +6,7 @@ import (
        "fmt"
        "net/http"
        "sort"
+       "strconv"
 )
 
 type KeepDisk struct {
@@ -53,10 +54,49 @@ func KeepDisks() (service_roots []string, err error) {
        return service_roots, nil
 }
 
-/*
-func ProbeSequence(service_roots []string) (pseq []string) {
-       pseq = make([]string, 0, len(disks))
-       pool := disks[:]
+func ShuffledServiceRoots(service_roots []string, hash string) (pseq []string) {
+       // Build an ordering with which to query the Keep servers based on the
+       // contents of the hash.  "hash" is a hex-encoded number at least 8
+       // digits (32 bits) long
 
+       // seed used to calculate the next keep server from 'pool' to be added
+       // to 'pseq'
+       seed := hash
+
+       // Keep servers still to be added to the ordering
+       pool := service_roots[:]
+
+       // output probe sequence
+       pseq = make([]string, 0, len(service_roots))
+
+       // iterate while there are servers left to be assigned
+       for len(pool) > 0 {
+
+               if len(seed) < 8 {
+                       // ran out of digits in the seed
+                       if len(pseq) < (len(hash) / 4) {
+                               // the number of servers added to the probe
+                               // sequence is less than the number of 4-digit
+                               // slices in 'hash' so refill the seed with the
+                               // last 4 digits.
+                               seed = hash[len(hash)-4:]
+                       }
+                       seed += hash
+               }
+
+               // Take the next 8 digits (32 bytes) and interpret as an integer,
+               // then modulus with the size of the remaining pool to get the next
+               // selected server.
+               probe, _ := strconv.ParseInt(seed[0:8], 16, 32)
+               probe %= int64(len(pool))
+
+               // Append the selected server to the probe sequence and remove it
+               // from the pool.
+               pseq = append(pseq, pool[probe])
+               pool = append(pool[:probe], pool[probe+1:]...)
+
+               // Remove the digits just used from the seed
+               seed = seed[8:]
+       }
+       return pseq
 }
-*/
index 88487ae96e672726cfa5dbb3142dcfdeafcbec94..fcb59ec805680167a9de22caf4d6dc27d30caa92 100644 (file)
@@ -159,18 +159,46 @@ class KeepClient(object):
             finally:
                 self.lock.release()
 
+        # Build an ordering with which to query the Keep servers based on the
+        # contents of the hash.
+        # "hash" is a hex-encoded number at least 8 digits
+        # (32 bits) long
+
+        # seed used to calculate the next keep server from 'pool'
+        # to be added to 'pseq'
         seed = hash
+
+        # Keep servers still to be added to the ordering
         pool = self.service_roots[:]
+
+        # output probe sequence
         pseq = []
+
+        # iterate while there are servers left to be assigned
         while len(pool) > 0:
             if len(seed) < 8:
-                if len(pseq) < len(hash) / 4: # first time around
+                # ran out of digits in the seed
+                if len(pseq) < len(hash) / 4:
+                    # the number of servers added to the probe sequence is less
+                    # than the number of 4-digit slices in 'hash' so refill the
+                    # seed with the last 4 digits and then append the contents
+                    # of 'hash'.
                     seed = hash[-4:] + hash
                 else:
+                    # refill the seed with the contents of 'hash'
                     seed += hash
+
+            # Take the next 8 digits (32 bytes) and interpret as an integer,
+            # then modulus with the size of the remaining pool to get the next
+            # selected server.
             probe = int(seed[0:8], 16) % len(pool)
+
+            # Append the selected server to the probe sequence and remove it
+            # from the pool.
             pseq += [pool[probe]]
             pool = pool[:probe] + pool[probe+1:]
+
+            # Remove the digits just used from the seed
             seed = seed[8:]
         logging.debug(str(pseq))
         return pseq
@@ -208,7 +236,7 @@ class KeepClient(object):
             self._cache_lock.release()
 
     def reserve_cache(self, locator):
-        '''Reserve a cache slot for the specified locator, 
+        '''Reserve a cache slot for the specified locator,
         or return the existing slot.'''
         self._cache_lock.acquire()
         try:
@@ -281,8 +309,8 @@ class KeepClient(object):
             with timer.Timer() as t:
                 resp, content = h.request(url.encode('utf-8'), 'GET',
                                           headers=headers)
-            logging.info("Received %s bytes in %s msec (%s MiB/sec)" % (len(content), 
-                                                                        t.msecs, 
+            logging.info("Received %s bytes in %s msec (%s MiB/sec)" % (len(content),
+                                                                        t.msecs,
                                                                         (len(content)/(1024*1024))/t.secs))
             if re.match(r'^2\d\d$', resp['status']):
                 m = hashlib.new('md5')