11537: Add Via header to proxied keepstore requests.
[arvados.git] / sdk / go / keepclient / support.go
1 package keepclient
2
3 import (
4         "crypto/md5"
5         "errors"
6         "fmt"
7         "io"
8         "io/ioutil"
9         "log"
10         "math/rand"
11         "net"
12         "net/http"
13         "os"
14         "regexp"
15         "strings"
16         "time"
17
18         "git.curoverse.com/arvados.git/sdk/go/streamer"
19 )
20
21 // Function used to emit debug messages. The easiest way to enable
22 // keepclient debug messages in your application is to assign
23 // log.Printf to DebugPrintf.
24 var DebugPrintf = func(string, ...interface{}) {}
25
26 func init() {
27         var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$")
28         if matchTrue.MatchString(os.Getenv("ARVADOS_DEBUG")) {
29                 DebugPrintf = log.Printf
30         }
31 }
32
33 type keepService struct {
34         Uuid     string `json:"uuid"`
35         Hostname string `json:"service_host"`
36         Port     int    `json:"service_port"`
37         SSL      bool   `json:"service_ssl_flag"`
38         SvcType  string `json:"service_type"`
39         ReadOnly bool   `json:"read_only"`
40 }
41
42 // Md5String returns md5 hash for the bytes in the given string
43 func Md5String(s string) string {
44         return fmt.Sprintf("%x", md5.Sum([]byte(s)))
45 }
46
47 // Set timeouts applicable when connecting to non-disk services
48 // (assumed to be over the Internet).
49 func (*KeepClient) setClientSettingsNonDisk(client *http.Client) {
50         // Maximum time to wait for a complete response
51         client.Timeout = 300 * time.Second
52
53         // TCP and TLS connection settings
54         client.Transport = &http.Transport{
55                 Dial: (&net.Dialer{
56                         // The maximum time to wait to set up
57                         // the initial TCP connection.
58                         Timeout: 30 * time.Second,
59
60                         // The TCP keep alive heartbeat
61                         // interval.
62                         KeepAlive: 120 * time.Second,
63                 }).Dial,
64
65                 TLSHandshakeTimeout: 10 * time.Second,
66         }
67 }
68
69 // Set timeouts applicable when connecting to keepstore services directly
70 // (assumed to be on the local network).
71 func (*KeepClient) setClientSettingsDisk(client *http.Client) {
72         // Maximum time to wait for a complete response
73         client.Timeout = 20 * time.Second
74
75         // TCP and TLS connection timeouts
76         client.Transport = &http.Transport{
77                 Dial: (&net.Dialer{
78                         // The maximum time to wait to set up
79                         // the initial TCP connection.
80                         Timeout: 2 * time.Second,
81
82                         // The TCP keep alive heartbeat
83                         // interval.
84                         KeepAlive: 180 * time.Second,
85                 }).Dial,
86
87                 TLSHandshakeTimeout: 4 * time.Second,
88         }
89 }
90
91 type svcList struct {
92         Items []keepService `json:"items"`
93 }
94
95 type uploadStatus struct {
96         err             error
97         url             string
98         statusCode      int
99         replicas_stored int
100         response        string
101 }
102
103 func (this *KeepClient) uploadToKeepServer(host string, hash string, body io.ReadCloser,
104         upload_status chan<- uploadStatus, expectedLength int64, requestID int32) {
105
106         var req *http.Request
107         var err error
108         var url = fmt.Sprintf("%s/%s", host, hash)
109         if req, err = http.NewRequest("PUT", url, nil); err != nil {
110                 DebugPrintf("DEBUG: [%08x] Error creating request PUT %v error: %v", requestID, url, err.Error())
111                 upload_status <- uploadStatus{err, url, 0, 0, ""}
112                 body.Close()
113                 return
114         }
115
116         req.ContentLength = expectedLength
117         if expectedLength > 0 {
118                 // http.Client.Do will close the body ReadCloser when it is
119                 // done with it.
120                 req.Body = body
121         } else {
122                 // "For client requests, a value of 0 means unknown if Body is
123                 // not nil."  In this case we do want the body to be empty, so
124                 // don't set req.Body.  However, we still need to close the
125                 // body ReadCloser.
126                 body.Close()
127         }
128
129         req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", this.Arvados.ApiToken))
130         req.Header.Add("Content-Type", "application/octet-stream")
131         req.Header.Add(X_Keep_Desired_Replicas, fmt.Sprint(this.Want_replicas))
132
133         var resp *http.Response
134         if resp, err = this.Client.Do(req); err != nil {
135                 DebugPrintf("DEBUG: [%08x] Upload failed %v error: %v", requestID, url, err.Error())
136                 upload_status <- uploadStatus{err, url, 0, 0, ""}
137                 return
138         }
139
140         rep := 1
141         if xr := resp.Header.Get(X_Keep_Replicas_Stored); xr != "" {
142                 fmt.Sscanf(xr, "%d", &rep)
143         }
144
145         defer resp.Body.Close()
146         defer io.Copy(ioutil.Discard, resp.Body)
147
148         respbody, err2 := ioutil.ReadAll(&io.LimitedReader{R: resp.Body, N: 4096})
149         response := strings.TrimSpace(string(respbody))
150         if err2 != nil && err2 != io.EOF {
151                 DebugPrintf("DEBUG: [%08x] Upload %v error: %v response: %v", requestID, url, err2.Error(), response)
152                 upload_status <- uploadStatus{err2, url, resp.StatusCode, rep, response}
153         } else if resp.StatusCode == http.StatusOK {
154                 DebugPrintf("DEBUG: [%08x] Upload %v success", requestID, url)
155                 upload_status <- uploadStatus{nil, url, resp.StatusCode, rep, response}
156         } else {
157                 if resp.StatusCode >= 300 && response == "" {
158                         response = resp.Status
159                 }
160                 DebugPrintf("DEBUG: [%08x] Upload %v error: %v response: %v", requestID, url, resp.StatusCode, response)
161                 upload_status <- uploadStatus{errors.New(resp.Status), url, resp.StatusCode, rep, response}
162         }
163 }
164
165 func (this *KeepClient) putReplicas(
166         hash string,
167         tr *streamer.AsyncStream,
168         expectedLength int64) (locator string, replicas int, err error) {
169
170         // Generate an arbitrary ID to identify this specific
171         // transaction in debug logs.
172         requestID := rand.Int31()
173
174         // Calculate the ordering for uploading to servers
175         sv := NewRootSorter(this.WritableLocalRoots(), hash).GetSortedRoots()
176
177         // The next server to try contacting
178         next_server := 0
179
180         // The number of active writers
181         active := 0
182
183         // Used to communicate status from the upload goroutines
184         upload_status := make(chan uploadStatus)
185         defer func() {
186                 // Wait for any abandoned uploads (e.g., we started
187                 // two uploads and the first replied with replicas=2)
188                 // to finish before closing the status channel.
189                 go func() {
190                         for active > 0 {
191                                 <-upload_status
192                         }
193                         close(upload_status)
194                 }()
195         }()
196
197         replicasDone := 0
198         replicasTodo := this.Want_replicas
199
200         replicasPerThread := this.replicasPerService
201         if replicasPerThread < 1 {
202                 // unlimited or unknown
203                 replicasPerThread = replicasTodo
204         }
205
206         retriesRemaining := 1 + this.Retries
207         var retryServers []string
208
209         lastError := make(map[string]string)
210
211         for retriesRemaining > 0 {
212                 retriesRemaining -= 1
213                 next_server = 0
214                 retryServers = []string{}
215                 for replicasTodo > 0 {
216                         for active*replicasPerThread < replicasTodo {
217                                 // Start some upload requests
218                                 if next_server < len(sv) {
219                                         DebugPrintf("DEBUG: [%08x] Begin upload %s to %s", requestID, hash, sv[next_server])
220                                         go this.uploadToKeepServer(sv[next_server], hash, tr.MakeStreamReader(), upload_status, expectedLength, requestID)
221                                         next_server += 1
222                                         active += 1
223                                 } else {
224                                         if active == 0 && retriesRemaining == 0 {
225                                                 msg := "Could not write sufficient replicas: "
226                                                 for _, resp := range lastError {
227                                                         msg += resp + "; "
228                                                 }
229                                                 msg = msg[:len(msg)-2]
230                                                 return locator, replicasDone, InsufficientReplicasError(errors.New(msg))
231                                         } else {
232                                                 break
233                                         }
234                                 }
235                         }
236                         DebugPrintf("DEBUG: [%08x] Replicas remaining to write: %v active uploads: %v",
237                                 requestID, replicasTodo, active)
238
239                         // Now wait for something to happen.
240                         if active > 0 {
241                                 status := <-upload_status
242                                 active -= 1
243
244                                 if status.statusCode == 200 {
245                                         // good news!
246                                         replicasDone += status.replicas_stored
247                                         replicasTodo -= status.replicas_stored
248                                         locator = status.response
249                                         delete(lastError, status.url)
250                                 } else {
251                                         msg := fmt.Sprintf("[%d] %s", status.statusCode, status.response)
252                                         if len(msg) > 100 {
253                                                 msg = msg[:100]
254                                         }
255                                         lastError[status.url] = msg
256                                 }
257
258                                 if status.statusCode == 0 || status.statusCode == 408 || status.statusCode == 429 ||
259                                         (status.statusCode >= 500 && status.statusCode != 503) {
260                                         // Timeout, too many requests, or other server side failure
261                                         // Do not retry when status code is 503, which means the keep server is full
262                                         retryServers = append(retryServers, status.url[0:strings.LastIndex(status.url, "/")])
263                                 }
264                         } else {
265                                 break
266                         }
267                 }
268
269                 sv = retryServers
270         }
271
272         return locator, replicasDone, nil
273 }