X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/b17b1ad22bd3cf518eb1b3657f62768b60c7db25..d89fc97f4a7ee74edcb1a5856fec560c748b9086:/services/datamanager/keep/keep.go diff --git a/services/datamanager/keep/keep.go b/services/datamanager/keep/keep.go index c16512069e..651c869ef0 100644 --- a/services/datamanager/keep/keep.go +++ b/services/datamanager/keep/keep.go @@ -83,7 +83,7 @@ func init() { flag.StringVar(&serviceType, "service-type", "disk", - "Supported keepservice type. Default is disk.") + "Operate only on keep_services with the specified service_type, ignoring all others.") } // String @@ -118,7 +118,7 @@ func GetKeepServersAndSummarize(params GetKeepServersParams) (results ReadServer // GetKeepServers from api server func GetKeepServers(params GetKeepServersParams) (results ReadServers, err error) { sdkParams := arvadosclient.Dict{ - "filters": [][]string{[]string{"service_type", "!=", "proxy"}}, + "filters": [][]string{{"service_type", "!=", "proxy"}}, } if params.Limit > 0 { sdkParams["limit"] = params.Limit @@ -131,18 +131,17 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers, err error return } - // Currently, only "disk" types are supported. Stop if any other service types are found. - foundSupportedServieType := false + var keepServers []ServerAddress for _, server := range sdkResponse.KeepServers { if server.ServiceType == serviceType { - foundSupportedServieType = true + keepServers = append(keepServers, server) } else { - log.Printf("Ignore unsupported service type: %v", server.ServiceType) + log.Printf("Skipping keep_service %q because its service_type %q does not match -service-type=%q", server, server.ServiceType, serviceType) } } - if !foundSupportedServieType { - return results, fmt.Errorf("Found no keepservices with the supported type %v", serviceType) + if len(keepServers) == 0 { + return results, fmt.Errorf("Found no keepservices with the service type %v", serviceType) } if params.Logger != nil { @@ -151,6 +150,7 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers, err error keepInfo["num_keep_servers_available"] = sdkResponse.ItemsAvailable keepInfo["num_keep_servers_received"] = len(sdkResponse.KeepServers) keepInfo["keep_servers"] = sdkResponse.KeepServers + keepInfo["indexable_keep_servers"] = keepServers }) } @@ -160,7 +160,7 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers, err error return results, fmt.Errorf("Did not receive all available keep servers: %+v", sdkResponse) } - results.KeepServerIndexToAddress = sdkResponse.KeepServers + results.KeepServerIndexToAddress = keepServers results.KeepServerAddressToIndex = make(map[ServerAddress]int) for i, address := range results.KeepServerIndexToAddress { results.KeepServerAddressToIndex[address] = i @@ -170,7 +170,7 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers, err error // Send off all the index requests concurrently responseChan := make(chan ServerResponse) - for _, keepServer := range sdkResponse.KeepServers { + for _, keepServer := range results.KeepServerIndexToAddress { // The above keepsServer variable is reused for each iteration, so // it would be shared across all goroutines. This would result in // us querying one server n times instead of n different servers @@ -188,7 +188,7 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers, err error results.BlockToServers = make(map[blockdigest.DigestWithSize][]BlockServerInfo) // Read all the responses - for i := range sdkResponse.KeepServers { + for i := range results.KeepServerIndexToAddress { _ = i // Here to prevent go from complaining. response := <-responseChan @@ -430,13 +430,23 @@ func parseBlockInfoFromIndexLine(indexLine string) (blockInfo BlockInfo, err err return } - blockInfo.Mtime, err = strconv.ParseInt(tokens[1], 10, 64) + var ns int64 + ns, err = strconv.ParseInt(tokens[1], 10, 64) if err != nil { return } - blockInfo.Digest = - blockdigest.DigestWithSize{Digest: locator.Digest, - Size: uint32(locator.Size)} + if ns < 1e12 { + // An old version of keepstore is giving us timestamps + // in seconds instead of nanoseconds. (This threshold + // correctly handles all times between 1970-01-02 and + // 33658-09-27.) + ns = ns * 1e9 + } + blockInfo.Mtime = ns + blockInfo.Digest = blockdigest.DigestWithSize{ + Digest: locator.Digest, + Size: uint32(locator.Size), + } return } @@ -466,13 +476,29 @@ type TrashRequest struct { type TrashList []TrashRequest // SendTrashLists to trash queue -func SendTrashLists(kc *keepclient.KeepClient, spl map[string]TrashList) (errs []error) { +func SendTrashLists(arvLogger *logger.Logger, kc *keepclient.KeepClient, spl map[string]TrashList, dryRun bool) (errs []error) { count := 0 barrier := make(chan error) client := kc.Client for url, v := range spl { + if arvLogger != nil { + // We need a local variable because Update doesn't call our mutator func until later, + // when our list variable might have been reused by the next loop iteration. + url := url + trashLen := len(v) + arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) { + trashListInfo := logger.GetOrCreateMap(p, "trash_list_len") + trashListInfo[url] = trashLen + }) + } + + if dryRun { + log.Printf("dry run, not sending trash list to service %s with %d blocks", url, len(v)) + continue + } + count++ log.Printf("Sending trash list to %v", url) @@ -512,7 +538,6 @@ func SendTrashLists(kc *keepclient.KeepClient, spl map[string]TrashList) (errs [ barrier <- nil } })(url, v) - } for i := 0; i < count; i++ {