X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/77f5a84ccc2b14438286ed05c6af183b8d8be605..0f5295ae31362eefe182f3a2329b3903d6f82a3b:/sdk/go/keepclient/support.go diff --git a/sdk/go/keepclient/support.go b/sdk/go/keepclient/support.go index 9adbb4878f..8545cb80b8 100644 --- a/sdk/go/keepclient/support.go +++ b/sdk/go/keepclient/support.go @@ -4,17 +4,16 @@ import ( "crypto/md5" "errors" "fmt" - "git.curoverse.com/arvados.git/sdk/go/streamer" "io" "io/ioutil" "log" "math/rand" - "net" "net/http" "os" - "regexp" "strings" - "time" + + "git.curoverse.com/arvados.git/sdk/go/arvadosclient" + "git.curoverse.com/arvados.git/sdk/go/streamer" ) // Function used to emit debug messages. The easiest way to enable @@ -23,8 +22,7 @@ import ( var DebugPrintf = func(string, ...interface{}) {} func init() { - var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$") - if matchTrue.MatchString(os.Getenv("ARVADOS_DEBUG")) { + if arvadosclient.StringBool(os.Getenv("ARVADOS_DEBUG")) { DebugPrintf = log.Printf } } @@ -43,54 +41,6 @@ func Md5String(s string) string { return fmt.Sprintf("%x", md5.Sum([]byte(s))) } -// Set timeouts applicable when connecting to non-disk services -// (assumed to be over the Internet). -func (this *KeepClient) setClientSettingsNonDisk() { - if this.Client.Timeout == 0 { - // Maximum time to wait for a complete response - this.Client.Timeout = 300 * time.Second - - // TCP and TLS connection settings - this.Client.Transport = &http.Transport{ - Dial: (&net.Dialer{ - // The maximum time to wait to set up - // the initial TCP connection. - Timeout: 30 * time.Second, - - // The TCP keep alive heartbeat - // interval. - KeepAlive: 120 * time.Second, - }).Dial, - - TLSHandshakeTimeout: 10 * time.Second, - } - } -} - -// Set timeouts applicable when connecting to keepstore services directly -// (assumed to be on the local network). -func (this *KeepClient) setClientSettingsDisk() { - if this.Client.Timeout == 0 { - // Maximum time to wait for a complete response - this.Client.Timeout = 20 * time.Second - - // TCP and TLS connection timeouts - this.Client.Transport = &http.Transport{ - Dial: (&net.Dialer{ - // The maximum time to wait to set up - // the initial TCP connection. - Timeout: 2 * time.Second, - - // The TCP keep alive heartbeat - // interval. - KeepAlive: 180 * time.Second, - }).Dial, - - TLSHandshakeTimeout: 4 * time.Second, - } - } -} - type svcList struct { Items []keepService `json:"items"` } @@ -118,8 +68,8 @@ func (this *KeepClient) uploadToKeepServer(host string, hash string, body io.Rea req.ContentLength = expectedLength if expectedLength > 0 { - // http.Client.Do will close the body ReadCloser when it is - // done with it. + // Do() will close the body ReadCloser when it is done + // with it. req.Body = body } else { // "For client requests, a value of 0 means unknown if Body is @@ -134,7 +84,7 @@ func (this *KeepClient) uploadToKeepServer(host string, hash string, body io.Rea req.Header.Add(X_Keep_Desired_Replicas, fmt.Sprint(this.Want_replicas)) var resp *http.Response - if resp, err = this.Client.Do(req); err != nil { + if resp, err = this.httpClient().Do(req); err != nil { DebugPrintf("DEBUG: [%08x] Upload failed %v error: %v", requestID, url, err.Error()) upload_status <- uploadStatus{err, url, 0, 0, ""} return @@ -157,6 +107,9 @@ func (this *KeepClient) uploadToKeepServer(host string, hash string, body io.Rea DebugPrintf("DEBUG: [%08x] Upload %v success", requestID, url) upload_status <- uploadStatus{nil, url, resp.StatusCode, rep, response} } else { + if resp.StatusCode >= 300 && response == "" { + response = resp.Status + } DebugPrintf("DEBUG: [%08x] Upload %v error: %v response: %v", requestID, url, resp.StatusCode, response) upload_status <- uploadStatus{errors.New(resp.Status), url, resp.StatusCode, rep, response} } @@ -206,6 +159,8 @@ func (this *KeepClient) putReplicas( retriesRemaining := 1 + this.Retries var retryServers []string + lastError := make(map[string]string) + for retriesRemaining > 0 { retriesRemaining -= 1 next_server = 0 @@ -220,7 +175,12 @@ func (this *KeepClient) putReplicas( active += 1 } else { if active == 0 && retriesRemaining == 0 { - return locator, replicasDone, InsufficientReplicasError + msg := "Could not write sufficient replicas: " + for _, resp := range lastError { + msg += resp + "; " + } + msg = msg[:len(msg)-2] + return locator, replicasDone, InsufficientReplicasError(errors.New(msg)) } else { break } @@ -239,7 +199,16 @@ func (this *KeepClient) putReplicas( replicasDone += status.replicas_stored replicasTodo -= status.replicas_stored locator = status.response - } else if status.statusCode == 0 || status.statusCode == 408 || status.statusCode == 429 || + delete(lastError, status.url) + } else { + msg := fmt.Sprintf("[%d] %s", status.statusCode, status.response) + if len(msg) > 100 { + msg = msg[:100] + } + lastError[status.url] = msg + } + + if status.statusCode == 0 || status.statusCode == 408 || status.statusCode == 429 || (status.statusCode >= 500 && status.statusCode != 503) { // Timeout, too many requests, or other server side failure // Do not retry when status code is 503, which means the keep server is full