Merge branch '16723-kill-vs-requeue'
[arvados.git] / sdk / go / keepclient / support.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 package keepclient
6
7 import (
8         "crypto/md5"
9         "errors"
10         "fmt"
11         "io"
12         "io/ioutil"
13         "log"
14         "net/http"
15         "os"
16         "strings"
17
18         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
19 )
20
21 // Function used to emit debug messages. The easiest way to enable
22 // keepclient debug messages in your application is to assign
23 // log.Printf to DebugPrintf.
24 var DebugPrintf = func(string, ...interface{}) {}
25
26 func init() {
27         if arvadosclient.StringBool(os.Getenv("ARVADOS_DEBUG")) {
28                 DebugPrintf = log.Printf
29         }
30 }
31
32 type keepService struct {
33         Uuid     string `json:"uuid"`
34         Hostname string `json:"service_host"`
35         Port     int    `json:"service_port"`
36         SSL      bool   `json:"service_ssl_flag"`
37         SvcType  string `json:"service_type"`
38         ReadOnly bool   `json:"read_only"`
39 }
40
41 // Md5String returns md5 hash for the bytes in the given string
42 func Md5String(s string) string {
43         return fmt.Sprintf("%x", md5.Sum([]byte(s)))
44 }
45
46 type svcList struct {
47         Items []keepService `json:"items"`
48 }
49
50 type uploadStatus struct {
51         err             error
52         url             string
53         statusCode      int
54         replicas_stored int
55         response        string
56 }
57
58 func (this *KeepClient) uploadToKeepServer(host string, hash string, body io.Reader,
59         upload_status chan<- uploadStatus, expectedLength int64, reqid string) {
60
61         var req *http.Request
62         var err error
63         var url = fmt.Sprintf("%s/%s", host, hash)
64         if req, err = http.NewRequest("PUT", url, nil); err != nil {
65                 DebugPrintf("DEBUG: [%s] Error creating request PUT %v error: %v", reqid, url, err.Error())
66                 upload_status <- uploadStatus{err, url, 0, 0, ""}
67                 return
68         }
69
70         req.ContentLength = expectedLength
71         if expectedLength > 0 {
72                 req.Body = ioutil.NopCloser(body)
73         } else {
74                 // "For client requests, a value of 0 means unknown if
75                 // Body is not nil."  In this case we do want the body
76                 // to be empty, so don't set req.Body.
77         }
78
79         req.Header.Add("X-Request-Id", reqid)
80         req.Header.Add("Authorization", "OAuth2 "+this.Arvados.ApiToken)
81         req.Header.Add("Content-Type", "application/octet-stream")
82         req.Header.Add(X_Keep_Desired_Replicas, fmt.Sprint(this.Want_replicas))
83         if len(this.StorageClasses) > 0 {
84                 req.Header.Add("X-Keep-Storage-Classes", strings.Join(this.StorageClasses, ", "))
85         }
86
87         var resp *http.Response
88         if resp, err = this.httpClient().Do(req); err != nil {
89                 DebugPrintf("DEBUG: [%s] Upload failed %v error: %v", reqid, url, err.Error())
90                 upload_status <- uploadStatus{err, url, 0, 0, err.Error()}
91                 return
92         }
93
94         rep := 1
95         if xr := resp.Header.Get(X_Keep_Replicas_Stored); xr != "" {
96                 fmt.Sscanf(xr, "%d", &rep)
97         }
98
99         defer resp.Body.Close()
100         defer io.Copy(ioutil.Discard, resp.Body)
101
102         respbody, err2 := ioutil.ReadAll(&io.LimitedReader{R: resp.Body, N: 4096})
103         response := strings.TrimSpace(string(respbody))
104         if err2 != nil && err2 != io.EOF {
105                 DebugPrintf("DEBUG: [%s] Upload %v error: %v response: %v", reqid, url, err2.Error(), response)
106                 upload_status <- uploadStatus{err2, url, resp.StatusCode, rep, response}
107         } else if resp.StatusCode == http.StatusOK {
108                 DebugPrintf("DEBUG: [%s] Upload %v success", reqid, url)
109                 upload_status <- uploadStatus{nil, url, resp.StatusCode, rep, response}
110         } else {
111                 if resp.StatusCode >= 300 && response == "" {
112                         response = resp.Status
113                 }
114                 DebugPrintf("DEBUG: [%s] Upload %v error: %v response: %v", reqid, url, resp.StatusCode, response)
115                 upload_status <- uploadStatus{errors.New(resp.Status), url, resp.StatusCode, rep, response}
116         }
117 }
118
119 func (this *KeepClient) putReplicas(
120         hash string,
121         getReader func() io.Reader,
122         expectedLength int64) (locator string, replicas int, err error) {
123
124         reqid := this.getRequestID()
125
126         // Calculate the ordering for uploading to servers
127         sv := NewRootSorter(this.WritableLocalRoots(), hash).GetSortedRoots()
128
129         // The next server to try contacting
130         nextServer := 0
131
132         // The number of active writers
133         active := 0
134
135         // Used to communicate status from the upload goroutines
136         upload_status := make(chan uploadStatus)
137         defer func() {
138                 // Wait for any abandoned uploads (e.g., we started
139                 // two uploads and the first replied with replicas=2)
140                 // to finish before closing the status channel.
141                 go func() {
142                         for active > 0 {
143                                 <-upload_status
144                         }
145                         close(upload_status)
146                 }()
147         }()
148
149         replicasDone := 0
150         replicasTodo := this.Want_replicas
151
152         replicasPerThread := this.replicasPerService
153         if replicasPerThread < 1 {
154                 // unlimited or unknown
155                 replicasPerThread = replicasTodo
156         }
157
158         retriesRemaining := 1 + this.Retries
159         var retryServers []string
160
161         lastError := make(map[string]string)
162
163         for retriesRemaining > 0 {
164                 retriesRemaining -= 1
165                 nextServer = 0
166                 retryServers = []string{}
167                 for replicasTodo > 0 {
168                         for active*replicasPerThread < replicasTodo {
169                                 // Start some upload requests
170                                 if nextServer < len(sv) {
171                                         DebugPrintf("DEBUG: [%s] Begin upload %s to %s", reqid, hash, sv[nextServer])
172                                         go this.uploadToKeepServer(sv[nextServer], hash, getReader(), upload_status, expectedLength, reqid)
173                                         nextServer += 1
174                                         active += 1
175                                 } else {
176                                         if active == 0 && retriesRemaining == 0 {
177                                                 msg := "Could not write sufficient replicas: "
178                                                 for _, resp := range lastError {
179                                                         msg += resp + "; "
180                                                 }
181                                                 msg = msg[:len(msg)-2]
182                                                 return locator, replicasDone, InsufficientReplicasError(errors.New(msg))
183                                         }
184                                         break
185                                 }
186                         }
187                         DebugPrintf("DEBUG: [%s] Replicas remaining to write: %v active uploads: %v",
188                                 reqid, replicasTodo, active)
189
190                         // Now wait for something to happen.
191                         if active > 0 {
192                                 status := <-upload_status
193                                 active -= 1
194
195                                 if status.statusCode == 200 {
196                                         // good news!
197                                         replicasDone += status.replicas_stored
198                                         replicasTodo -= status.replicas_stored
199                                         locator = status.response
200                                         delete(lastError, status.url)
201                                 } else {
202                                         msg := fmt.Sprintf("[%d] %s", status.statusCode, status.response)
203                                         if len(msg) > 100 {
204                                                 msg = msg[:100]
205                                         }
206                                         lastError[status.url] = msg
207                                 }
208
209                                 if status.statusCode == 0 || status.statusCode == 408 || status.statusCode == 429 ||
210                                         (status.statusCode >= 500 && status.statusCode != 503) {
211                                         // Timeout, too many requests, or other server side failure
212                                         // Do not retry when status code is 503, which means the keep server is full
213                                         retryServers = append(retryServers, status.url[0:strings.LastIndex(status.url, "/")])
214                                 }
215                         } else {
216                                 break
217                         }
218                 }
219
220                 sv = retryServers
221         }
222
223         return locator, replicasDone, nil
224 }