X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/58692c916bb6dfe2997838ca4147109d9410c86a..e56ae6aad06c37d5512537047871d7363dd97620:/sdk/go/keepclient/support.go diff --git a/sdk/go/keepclient/support.go b/sdk/go/keepclient/support.go index 90ac3bcb17..33ba8720bc 100644 --- a/sdk/go/keepclient/support.go +++ b/sdk/go/keepclient/support.go @@ -4,15 +4,18 @@ import ( "crypto/md5" "errors" "fmt" - "git.curoverse.com/arvados.git/sdk/go/streamer" "io" "io/ioutil" "log" "math/rand" "net" "net/http" + "os" + "regexp" "strings" "time" + + "git.curoverse.com/arvados.git/sdk/go/streamer" ) // Function used to emit debug messages. The easiest way to enable @@ -20,6 +23,13 @@ import ( // log.Printf to DebugPrintf. var DebugPrintf = func(string, ...interface{}) {} +func init() { + var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$") + if matchTrue.MatchString(os.Getenv("ARVADOS_DEBUG")) { + DebugPrintf = log.Printf + } +} + type keepService struct { Uuid string `json:"uuid"` Hostname string `json:"service_host"` @@ -34,51 +44,47 @@ func Md5String(s string) string { return fmt.Sprintf("%x", md5.Sum([]byte(s))) } -// Set timeouts apply when connecting to keepproxy services (assumed to be over -// the Internet). -func (this *KeepClient) setClientSettingsProxy() { - if this.Client.Timeout == 0 { - // Maximum time to wait for a complete response - this.Client.Timeout = 300 * time.Second - - // TCP and TLS connection settings - this.Client.Transport = &http.Transport{ - Dial: (&net.Dialer{ - // The maximum time to wait to set up - // the initial TCP connection. - Timeout: 30 * time.Second, - - // The TCP keep alive heartbeat - // interval. - KeepAlive: 120 * time.Second, - }).Dial, - - TLSHandshakeTimeout: 10 * time.Second, - } +// Set timeouts applicable when connecting to non-disk services +// (assumed to be over the Internet). +func (*KeepClient) setClientSettingsNonDisk(client *http.Client) { + // Maximum time to wait for a complete response + client.Timeout = 300 * time.Second + + // TCP and TLS connection settings + client.Transport = &http.Transport{ + Dial: (&net.Dialer{ + // The maximum time to wait to set up + // the initial TCP connection. + Timeout: 30 * time.Second, + + // The TCP keep alive heartbeat + // interval. + KeepAlive: 120 * time.Second, + }).Dial, + + TLSHandshakeTimeout: 10 * time.Second, } } -// Set timeouts apply when connecting to keepstore services directly (assumed -// to be on the local network). -func (this *KeepClient) setClientSettingsDisk() { - if this.Client.Timeout == 0 { - // Maximum time to wait for a complete response - this.Client.Timeout = 20 * time.Second - - // TCP and TLS connection timeouts - this.Client.Transport = &http.Transport{ - Dial: (&net.Dialer{ - // The maximum time to wait to set up - // the initial TCP connection. - Timeout: 2 * time.Second, - - // The TCP keep alive heartbeat - // interval. - KeepAlive: 180 * time.Second, - }).Dial, - - TLSHandshakeTimeout: 4 * time.Second, - } +// Set timeouts applicable when connecting to keepstore services directly +// (assumed to be on the local network). +func (*KeepClient) setClientSettingsDisk(client *http.Client) { + // Maximum time to wait for a complete response + client.Timeout = 20 * time.Second + + // TCP and TLS connection timeouts + client.Transport = &http.Transport{ + Dial: (&net.Dialer{ + // The maximum time to wait to set up + // the initial TCP connection. + Timeout: 2 * time.Second, + + // The TCP keep alive heartbeat + // interval. + KeepAlive: 180 * time.Second, + }).Dial, + + TLSHandshakeTimeout: 4 * time.Second, } } @@ -101,7 +107,7 @@ func (this *KeepClient) uploadToKeepServer(host string, hash string, body io.Rea var err error var url = fmt.Sprintf("%s/%s", host, hash) if req, err = http.NewRequest("PUT", url, nil); err != nil { - log.Printf("[%08x] Error creating request PUT %v error: %v", requestID, url, err.Error()) + DebugPrintf("DEBUG: [%08x] Error creating request PUT %v error: %v", requestID, url, err.Error()) upload_status <- uploadStatus{err, url, 0, 0, ""} body.Close() return @@ -126,7 +132,7 @@ func (this *KeepClient) uploadToKeepServer(host string, hash string, body io.Rea var resp *http.Response if resp, err = this.Client.Do(req); err != nil { - log.Printf("[%08x] Upload failed %v error: %v", requestID, url, err.Error()) + DebugPrintf("DEBUG: [%08x] Upload failed %v error: %v", requestID, url, err.Error()) upload_status <- uploadStatus{err, url, 0, 0, ""} return } @@ -142,13 +148,16 @@ func (this *KeepClient) uploadToKeepServer(host string, hash string, body io.Rea respbody, err2 := ioutil.ReadAll(&io.LimitedReader{R: resp.Body, N: 4096}) response := strings.TrimSpace(string(respbody)) if err2 != nil && err2 != io.EOF { - log.Printf("[%08x] Upload %v error: %v response: %v", requestID, url, err2.Error(), response) + DebugPrintf("DEBUG: [%08x] Upload %v error: %v response: %v", requestID, url, err2.Error(), response) upload_status <- uploadStatus{err2, url, resp.StatusCode, rep, response} } else if resp.StatusCode == http.StatusOK { - log.Printf("[%08x] Upload %v success", requestID, url) + DebugPrintf("DEBUG: [%08x] Upload %v success", requestID, url) upload_status <- uploadStatus{nil, url, resp.StatusCode, rep, response} } else { - log.Printf("[%08x] Upload %v error: %v response: %v", requestID, url, resp.StatusCode, response) + if resp.StatusCode >= 300 && response == "" { + response = resp.Status + } + DebugPrintf("DEBUG: [%08x] Upload %v error: %v response: %v", requestID, url, resp.StatusCode, response) upload_status <- uploadStatus{errors.New(resp.Status), url, resp.StatusCode, rep, response} } } @@ -185,40 +194,47 @@ func (this *KeepClient) putReplicas( }() }() - // Desired number of replicas - remaining_replicas := this.Want_replicas + replicasDone := 0 + replicasTodo := this.Want_replicas replicasPerThread := this.replicasPerService if replicasPerThread < 1 { // unlimited or unknown - replicasPerThread = remaining_replicas + replicasPerThread = replicasTodo } retriesRemaining := 1 + this.Retries var retryServers []string + lastError := make(map[string]string) + for retriesRemaining > 0 { retriesRemaining -= 1 next_server = 0 retryServers = []string{} - for remaining_replicas > 0 { - for active*replicasPerThread < remaining_replicas { + for replicasTodo > 0 { + for active*replicasPerThread < replicasTodo { // Start some upload requests if next_server < len(sv) { - log.Printf("[%08x] Begin upload %s to %s", requestID, hash, sv[next_server]) + DebugPrintf("DEBUG: [%08x] Begin upload %s to %s", requestID, hash, sv[next_server]) go this.uploadToKeepServer(sv[next_server], hash, tr.MakeStreamReader(), upload_status, expectedLength, requestID) next_server += 1 active += 1 } else { if active == 0 && retriesRemaining == 0 { - return locator, (this.Want_replicas - remaining_replicas), InsufficientReplicasError + msg := "Could not write sufficient replicas: " + for _, resp := range lastError { + msg += resp + "; " + } + msg = msg[:len(msg)-2] + return locator, replicasDone, InsufficientReplicasError(errors.New(msg)) } else { break } } } - log.Printf("[%08x] Replicas remaining to write: %v active uploads: %v", - requestID, remaining_replicas, active) + DebugPrintf("DEBUG: [%08x] Replicas remaining to write: %v active uploads: %v", + requestID, replicasTodo, active) // Now wait for something to happen. if active > 0 { @@ -227,9 +243,19 @@ func (this *KeepClient) putReplicas( if status.statusCode == 200 { // good news! - remaining_replicas -= status.replicas_stored + replicasDone += status.replicas_stored + replicasTodo -= status.replicas_stored locator = status.response - } else if status.statusCode == 0 || status.statusCode == 408 || status.statusCode == 429 || + delete(lastError, status.url) + } else { + msg := fmt.Sprintf("[%d] %s", status.statusCode, status.response) + if len(msg) > 100 { + msg = msg[:100] + } + lastError[status.url] = msg + } + + if status.statusCode == 0 || status.statusCode == 408 || status.statusCode == 429 || (status.statusCode >= 500 && status.statusCode != 503) { // Timeout, too many requests, or other server side failure // Do not retry when status code is 503, which means the keep server is full @@ -243,5 +269,5 @@ func (this *KeepClient) putReplicas( sv = retryServers } - return locator, this.Want_replicas, nil + return locator, replicasDone, nil }