5824: Merge branch 'master' into 5824-keep-web
[arvados.git] / sdk / go / keepclient / support.go
1 package keepclient
2
3 import (
4         "crypto/md5"
5         "encoding/json"
6         "errors"
7         "fmt"
8         "git.curoverse.com/arvados.git/sdk/go/streamer"
9         "io"
10         "io/ioutil"
11         "log"
12         "net"
13         "net/http"
14         "strings"
15         "time"
16 )
17
18 type keepService struct {
19         Uuid     string `json:"uuid"`
20         Hostname string `json:"service_host"`
21         Port     int    `json:"service_port"`
22         SSL      bool   `json:"service_ssl_flag"`
23         SvcType  string `json:"service_type"`
24         ReadOnly bool   `json:"read_only"`
25 }
26
27 // Md5String returns md5 hash for the bytes in the given string
28 func Md5String(s string) string {
29         return fmt.Sprintf("%x", md5.Sum([]byte(s)))
30 }
31
32 // Set timeouts apply when connecting to keepproxy services (assumed to be over
33 // the Internet).
34 func (this *KeepClient) setClientSettingsProxy() {
35         if this.Client.Timeout == 0 {
36                 // Maximum time to wait for a complete response
37                 this.Client.Timeout = 300 * time.Second
38
39                 // TCP and TLS connection settings
40                 this.Client.Transport = &http.Transport{
41                         Dial: (&net.Dialer{
42                                 // The maximum time to wait to set up
43                                 // the initial TCP connection.
44                                 Timeout: 30 * time.Second,
45
46                                 // The TCP keep alive heartbeat
47                                 // interval.
48                                 KeepAlive: 120 * time.Second,
49                         }).Dial,
50
51                         TLSHandshakeTimeout: 10 * time.Second,
52                 }
53         }
54 }
55
56 // Set timeouts apply when connecting to keepstore services directly (assumed
57 // to be on the local network).
58 func (this *KeepClient) setClientSettingsDisk() {
59         if this.Client.Timeout == 0 {
60                 // Maximum time to wait for a complete response
61                 this.Client.Timeout = 20 * time.Second
62
63                 // TCP and TLS connection timeouts
64                 this.Client.Transport = &http.Transport{
65                         Dial: (&net.Dialer{
66                                 // The maximum time to wait to set up
67                                 // the initial TCP connection.
68                                 Timeout: 2 * time.Second,
69
70                                 // The TCP keep alive heartbeat
71                                 // interval.
72                                 KeepAlive: 180 * time.Second,
73                         }).Dial,
74
75                         TLSHandshakeTimeout: 4 * time.Second,
76                 }
77         }
78 }
79
80 type svcList struct {
81         Items []keepService `json:"items"`
82 }
83
84 // DiscoverKeepServers gets list of available keep services from api server
85 func (this *KeepClient) DiscoverKeepServers() error {
86         var list svcList
87
88         // Get keep services from api server
89         err := this.Arvados.Call("GET", "keep_services", "", "accessible", nil, &list)
90         if err != nil {
91                 return err
92         }
93
94         return this.loadKeepServers(list)
95 }
96
97 // LoadKeepServicesFromJSON gets list of available keep services from given JSON
98 func (this *KeepClient) LoadKeepServicesFromJSON(services string) error {
99         var list svcList
100
101         // Load keep services from given json
102         dec := json.NewDecoder(strings.NewReader(services))
103         if err := dec.Decode(&list); err != nil {
104                 return err
105         }
106
107         return this.loadKeepServers(list)
108 }
109
110 // loadKeepServers
111 func (this *KeepClient) loadKeepServers(list svcList) error {
112         listed := make(map[string]bool)
113         localRoots := make(map[string]string)
114         gatewayRoots := make(map[string]string)
115         writableLocalRoots := make(map[string]string)
116
117         // replicasPerService is 1 for disks; unknown or unlimited otherwise
118         this.replicasPerService = 1
119         this.Using_proxy = false
120
121         for _, service := range list.Items {
122                 scheme := "http"
123                 if service.SSL {
124                         scheme = "https"
125                 }
126                 url := fmt.Sprintf("%s://%s:%d", scheme, service.Hostname, service.Port)
127
128                 // Skip duplicates
129                 if listed[url] {
130                         continue
131                 }
132                 listed[url] = true
133
134                 localRoots[service.Uuid] = url
135                 if service.SvcType == "proxy" {
136                         this.Using_proxy = true
137                 }
138
139                 if service.ReadOnly == false {
140                         writableLocalRoots[service.Uuid] = url
141                         if service.SvcType != "disk" {
142                                 this.replicasPerService = 0
143                         }
144                 }
145
146                 // Gateway services are only used when specified by
147                 // UUID, so there's nothing to gain by filtering them
148                 // by service type. Including all accessible services
149                 // (gateway and otherwise) merely accommodates more
150                 // service configurations.
151                 gatewayRoots[service.Uuid] = url
152         }
153
154         if this.Using_proxy {
155                 this.setClientSettingsProxy()
156         } else {
157                 this.setClientSettingsDisk()
158         }
159
160         this.SetServiceRoots(localRoots, writableLocalRoots, gatewayRoots)
161         return nil
162 }
163
164 type uploadStatus struct {
165         err             error
166         url             string
167         statusCode      int
168         replicas_stored int
169         response        string
170 }
171
172 func (this *KeepClient) uploadToKeepServer(host string, hash string, body io.ReadCloser,
173         upload_status chan<- uploadStatus, expectedLength int64, requestId string) {
174
175         var req *http.Request
176         var err error
177         var url = fmt.Sprintf("%s/%s", host, hash)
178         if req, err = http.NewRequest("PUT", url, nil); err != nil {
179                 log.Printf("[%v] Error creating request PUT %v error: %v", requestId, url, err.Error())
180                 upload_status <- uploadStatus{err, url, 0, 0, ""}
181                 body.Close()
182                 return
183         }
184
185         req.ContentLength = expectedLength
186         if expectedLength > 0 {
187                 // http.Client.Do will close the body ReadCloser when it is
188                 // done with it.
189                 req.Body = body
190         } else {
191                 // "For client requests, a value of 0 means unknown if Body is
192                 // not nil."  In this case we do want the body to be empty, so
193                 // don't set req.Body.  However, we still need to close the
194                 // body ReadCloser.
195                 body.Close()
196         }
197
198         req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", this.Arvados.ApiToken))
199         req.Header.Add("Content-Type", "application/octet-stream")
200         req.Header.Add(X_Keep_Desired_Replicas, fmt.Sprint(this.Want_replicas))
201
202         var resp *http.Response
203         if resp, err = this.Client.Do(req); err != nil {
204                 log.Printf("[%v] Upload failed %v error: %v", requestId, url, err.Error())
205                 upload_status <- uploadStatus{err, url, 0, 0, ""}
206                 return
207         }
208
209         rep := 1
210         if xr := resp.Header.Get(X_Keep_Replicas_Stored); xr != "" {
211                 fmt.Sscanf(xr, "%d", &rep)
212         }
213
214         defer resp.Body.Close()
215         defer io.Copy(ioutil.Discard, resp.Body)
216
217         respbody, err2 := ioutil.ReadAll(&io.LimitedReader{R: resp.Body, N: 4096})
218         response := strings.TrimSpace(string(respbody))
219         if err2 != nil && err2 != io.EOF {
220                 log.Printf("[%v] Upload %v error: %v response: %v", requestId, url, err2.Error(), response)
221                 upload_status <- uploadStatus{err2, url, resp.StatusCode, rep, response}
222         } else if resp.StatusCode == http.StatusOK {
223                 log.Printf("[%v] Upload %v success", requestId, url)
224                 upload_status <- uploadStatus{nil, url, resp.StatusCode, rep, response}
225         } else {
226                 log.Printf("[%v] Upload %v error: %v response: %v", requestId, url, resp.StatusCode, response)
227                 upload_status <- uploadStatus{errors.New(resp.Status), url, resp.StatusCode, rep, response}
228         }
229 }
230
231 func (this *KeepClient) putReplicas(
232         hash string,
233         tr *streamer.AsyncStream,
234         expectedLength int64) (locator string, replicas int, err error) {
235
236         // Take the hash of locator and timestamp in order to identify this
237         // specific transaction in log statements.
238         requestId := fmt.Sprintf("%x", md5.Sum([]byte(hash+time.Now().String())))[0:8]
239
240         // Calculate the ordering for uploading to servers
241         sv := NewRootSorter(this.WritableLocalRoots(), hash).GetSortedRoots()
242
243         // The next server to try contacting
244         next_server := 0
245
246         // The number of active writers
247         active := 0
248
249         // Used to communicate status from the upload goroutines
250         upload_status := make(chan uploadStatus)
251         defer func() {
252                 // Wait for any abandoned uploads (e.g., we started
253                 // two uploads and the first replied with replicas=2)
254                 // to finish before closing the status channel.
255                 go func() {
256                         for active > 0 {
257                                 <-upload_status
258                         }
259                         close(upload_status)
260                 }()
261         }()
262
263         // Desired number of replicas
264         remaining_replicas := this.Want_replicas
265
266         replicasPerThread := this.replicasPerService
267         if replicasPerThread < 1 {
268                 // unlimited or unknown
269                 replicasPerThread = remaining_replicas
270         }
271
272         retriesRemaining := 1 + this.Retries
273         var retryServers []string
274
275         for retriesRemaining > 0 {
276                 retriesRemaining -= 1
277                 next_server = 0
278                 retryServers = []string{}
279                 for remaining_replicas > 0 {
280                         for active*replicasPerThread < remaining_replicas {
281                                 // Start some upload requests
282                                 if next_server < len(sv) {
283                                         log.Printf("[%v] Begin upload %s to %s", requestId, hash, sv[next_server])
284                                         go this.uploadToKeepServer(sv[next_server], hash, tr.MakeStreamReader(), upload_status, expectedLength, requestId)
285                                         next_server += 1
286                                         active += 1
287                                 } else {
288                                         if active == 0 && retriesRemaining == 0 {
289                                                 return locator, (this.Want_replicas - remaining_replicas), InsufficientReplicasError
290                                         } else {
291                                                 break
292                                         }
293                                 }
294                         }
295                         log.Printf("[%v] Replicas remaining to write: %v active uploads: %v",
296                                 requestId, remaining_replicas, active)
297
298                         // Now wait for something to happen.
299                         if active > 0 {
300                                 status := <-upload_status
301                                 active -= 1
302
303                                 if status.statusCode == 200 {
304                                         // good news!
305                                         remaining_replicas -= status.replicas_stored
306                                         locator = status.response
307                                 } else if status.statusCode == 0 || status.statusCode == 408 || status.statusCode == 429 ||
308                                         (status.statusCode >= 500 && status.statusCode != 503) {
309                                         // Timeout, too many requests, or other server side failure
310                                         // Do not retry when status code is 503, which means the keep server is full
311                                         retryServers = append(retryServers, status.url[0:strings.LastIndex(status.url, "/")])
312                                 }
313                         } else {
314                                 break
315                         }
316                 }
317
318                 sv = retryServers
319         }
320
321         return locator, this.Want_replicas, nil
322 }