Merge branch 'master' into 7661-fuse-by-pdh
[arvados.git] / sdk / go / keepclient / support.go
1 package keepclient
2
3 import (
4         "crypto/md5"
5         "encoding/json"
6         "errors"
7         "fmt"
8         "git.curoverse.com/arvados.git/sdk/go/streamer"
9         "io"
10         "io/ioutil"
11         "log"
12         "math/rand"
13         "net"
14         "net/http"
15         "strings"
16         "time"
17 )
18
19 // Function used to emit debug messages. The easiest way to enable
20 // keepclient debug messages in your application is to assign
21 // log.Printf to DebugPrintf.
22 var DebugPrintf = func(string, ...interface{}) {}
23
24 type keepService struct {
25         Uuid     string `json:"uuid"`
26         Hostname string `json:"service_host"`
27         Port     int    `json:"service_port"`
28         SSL      bool   `json:"service_ssl_flag"`
29         SvcType  string `json:"service_type"`
30         ReadOnly bool   `json:"read_only"`
31 }
32
33 // Md5String returns md5 hash for the bytes in the given string
34 func Md5String(s string) string {
35         return fmt.Sprintf("%x", md5.Sum([]byte(s)))
36 }
37
38 // Set timeouts apply when connecting to keepproxy services (assumed to be over
39 // the Internet).
40 func (this *KeepClient) setClientSettingsProxy() {
41         if this.Client.Timeout == 0 {
42                 // Maximum time to wait for a complete response
43                 this.Client.Timeout = 300 * time.Second
44
45                 // TCP and TLS connection settings
46                 this.Client.Transport = &http.Transport{
47                         Dial: (&net.Dialer{
48                                 // The maximum time to wait to set up
49                                 // the initial TCP connection.
50                                 Timeout: 30 * time.Second,
51
52                                 // The TCP keep alive heartbeat
53                                 // interval.
54                                 KeepAlive: 120 * time.Second,
55                         }).Dial,
56
57                         TLSHandshakeTimeout: 10 * time.Second,
58                 }
59         }
60 }
61
62 // Set timeouts apply when connecting to keepstore services directly (assumed
63 // to be on the local network).
64 func (this *KeepClient) setClientSettingsDisk() {
65         if this.Client.Timeout == 0 {
66                 // Maximum time to wait for a complete response
67                 this.Client.Timeout = 20 * time.Second
68
69                 // TCP and TLS connection timeouts
70                 this.Client.Transport = &http.Transport{
71                         Dial: (&net.Dialer{
72                                 // The maximum time to wait to set up
73                                 // the initial TCP connection.
74                                 Timeout: 2 * time.Second,
75
76                                 // The TCP keep alive heartbeat
77                                 // interval.
78                                 KeepAlive: 180 * time.Second,
79                         }).Dial,
80
81                         TLSHandshakeTimeout: 4 * time.Second,
82                 }
83         }
84 }
85
86 type svcList struct {
87         Items []keepService `json:"items"`
88 }
89
90 // DiscoverKeepServers gets list of available keep services from api server
91 func (this *KeepClient) DiscoverKeepServers() error {
92         var list svcList
93
94         // Get keep services from api server
95         err := this.Arvados.Call("GET", "keep_services", "", "accessible", nil, &list)
96         if err != nil {
97                 return err
98         }
99
100         return this.loadKeepServers(list)
101 }
102
103 // LoadKeepServicesFromJSON gets list of available keep services from given JSON
104 func (this *KeepClient) LoadKeepServicesFromJSON(services string) error {
105         var list svcList
106
107         // Load keep services from given json
108         dec := json.NewDecoder(strings.NewReader(services))
109         if err := dec.Decode(&list); err != nil {
110                 return err
111         }
112
113         return this.loadKeepServers(list)
114 }
115
116 // loadKeepServers
117 func (this *KeepClient) loadKeepServers(list svcList) error {
118         listed := make(map[string]bool)
119         localRoots := make(map[string]string)
120         gatewayRoots := make(map[string]string)
121         writableLocalRoots := make(map[string]string)
122
123         // replicasPerService is 1 for disks; unknown or unlimited otherwise
124         this.replicasPerService = 1
125         this.Using_proxy = false
126
127         for _, service := range list.Items {
128                 scheme := "http"
129                 if service.SSL {
130                         scheme = "https"
131                 }
132                 url := fmt.Sprintf("%s://%s:%d", scheme, service.Hostname, service.Port)
133
134                 // Skip duplicates
135                 if listed[url] {
136                         continue
137                 }
138                 listed[url] = true
139
140                 localRoots[service.Uuid] = url
141                 if service.SvcType == "proxy" {
142                         this.Using_proxy = true
143                 }
144
145                 if service.ReadOnly == false {
146                         writableLocalRoots[service.Uuid] = url
147                         if service.SvcType != "disk" {
148                                 this.replicasPerService = 0
149                         }
150                 }
151
152                 // Gateway services are only used when specified by
153                 // UUID, so there's nothing to gain by filtering them
154                 // by service type. Including all accessible services
155                 // (gateway and otherwise) merely accommodates more
156                 // service configurations.
157                 gatewayRoots[service.Uuid] = url
158         }
159
160         if this.Using_proxy {
161                 this.setClientSettingsProxy()
162         } else {
163                 this.setClientSettingsDisk()
164         }
165
166         this.SetServiceRoots(localRoots, writableLocalRoots, gatewayRoots)
167         return nil
168 }
169
170 type uploadStatus struct {
171         err             error
172         url             string
173         statusCode      int
174         replicas_stored int
175         response        string
176 }
177
178 func (this *KeepClient) uploadToKeepServer(host string, hash string, body io.ReadCloser,
179         upload_status chan<- uploadStatus, expectedLength int64, requestID int32) {
180
181         var req *http.Request
182         var err error
183         var url = fmt.Sprintf("%s/%s", host, hash)
184         if req, err = http.NewRequest("PUT", url, nil); err != nil {
185                 log.Printf("[%08x] Error creating request PUT %v error: %v", requestID, url, err.Error())
186                 upload_status <- uploadStatus{err, url, 0, 0, ""}
187                 body.Close()
188                 return
189         }
190
191         req.ContentLength = expectedLength
192         if expectedLength > 0 {
193                 // http.Client.Do will close the body ReadCloser when it is
194                 // done with it.
195                 req.Body = body
196         } else {
197                 // "For client requests, a value of 0 means unknown if Body is
198                 // not nil."  In this case we do want the body to be empty, so
199                 // don't set req.Body.  However, we still need to close the
200                 // body ReadCloser.
201                 body.Close()
202         }
203
204         req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", this.Arvados.ApiToken))
205         req.Header.Add("Content-Type", "application/octet-stream")
206         req.Header.Add(X_Keep_Desired_Replicas, fmt.Sprint(this.Want_replicas))
207
208         var resp *http.Response
209         if resp, err = this.Client.Do(req); err != nil {
210                 log.Printf("[%08x] Upload failed %v error: %v", requestID, url, err.Error())
211                 upload_status <- uploadStatus{err, url, 0, 0, ""}
212                 return
213         }
214
215         rep := 1
216         if xr := resp.Header.Get(X_Keep_Replicas_Stored); xr != "" {
217                 fmt.Sscanf(xr, "%d", &rep)
218         }
219
220         defer resp.Body.Close()
221         defer io.Copy(ioutil.Discard, resp.Body)
222
223         respbody, err2 := ioutil.ReadAll(&io.LimitedReader{R: resp.Body, N: 4096})
224         response := strings.TrimSpace(string(respbody))
225         if err2 != nil && err2 != io.EOF {
226                 log.Printf("[%08x] Upload %v error: %v response: %v", requestID, url, err2.Error(), response)
227                 upload_status <- uploadStatus{err2, url, resp.StatusCode, rep, response}
228         } else if resp.StatusCode == http.StatusOK {
229                 log.Printf("[%08x] Upload %v success", requestID, url)
230                 upload_status <- uploadStatus{nil, url, resp.StatusCode, rep, response}
231         } else {
232                 log.Printf("[%08x] Upload %v error: %v response: %v", requestID, url, resp.StatusCode, response)
233                 upload_status <- uploadStatus{errors.New(resp.Status), url, resp.StatusCode, rep, response}
234         }
235 }
236
237 func (this *KeepClient) putReplicas(
238         hash string,
239         tr *streamer.AsyncStream,
240         expectedLength int64) (locator string, replicas int, err error) {
241
242         // Generate an arbitrary ID to identify this specific
243         // transaction in debug logs.
244         requestID := rand.Int31()
245
246         // Calculate the ordering for uploading to servers
247         sv := NewRootSorter(this.WritableLocalRoots(), hash).GetSortedRoots()
248
249         // The next server to try contacting
250         next_server := 0
251
252         // The number of active writers
253         active := 0
254
255         // Used to communicate status from the upload goroutines
256         upload_status := make(chan uploadStatus)
257         defer func() {
258                 // Wait for any abandoned uploads (e.g., we started
259                 // two uploads and the first replied with replicas=2)
260                 // to finish before closing the status channel.
261                 go func() {
262                         for active > 0 {
263                                 <-upload_status
264                         }
265                         close(upload_status)
266                 }()
267         }()
268
269         // Desired number of replicas
270         remaining_replicas := this.Want_replicas
271
272         replicasPerThread := this.replicasPerService
273         if replicasPerThread < 1 {
274                 // unlimited or unknown
275                 replicasPerThread = remaining_replicas
276         }
277
278         retriesRemaining := 1 + this.Retries
279         var retryServers []string
280
281         for retriesRemaining > 0 {
282                 retriesRemaining -= 1
283                 next_server = 0
284                 retryServers = []string{}
285                 for remaining_replicas > 0 {
286                         for active*replicasPerThread < remaining_replicas {
287                                 // Start some upload requests
288                                 if next_server < len(sv) {
289                                         log.Printf("[%08x] Begin upload %s to %s", requestID, hash, sv[next_server])
290                                         go this.uploadToKeepServer(sv[next_server], hash, tr.MakeStreamReader(), upload_status, expectedLength, requestID)
291                                         next_server += 1
292                                         active += 1
293                                 } else {
294                                         if active == 0 && retriesRemaining == 0 {
295                                                 return locator, (this.Want_replicas - remaining_replicas), InsufficientReplicasError
296                                         } else {
297                                                 break
298                                         }
299                                 }
300                         }
301                         log.Printf("[%08x] Replicas remaining to write: %v active uploads: %v",
302                                 requestID, remaining_replicas, active)
303
304                         // Now wait for something to happen.
305                         if active > 0 {
306                                 status := <-upload_status
307                                 active -= 1
308
309                                 if status.statusCode == 200 {
310                                         // good news!
311                                         remaining_replicas -= status.replicas_stored
312                                         locator = status.response
313                                 } else if status.statusCode == 0 || status.statusCode == 408 || status.statusCode == 429 ||
314                                         (status.statusCode >= 500 && status.statusCode != 503) {
315                                         // Timeout, too many requests, or other server side failure
316                                         // Do not retry when status code is 503, which means the keep server is full
317                                         retryServers = append(retryServers, status.url[0:strings.LastIndex(status.url, "/")])
318                                 }
319                         } else {
320                                 break
321                         }
322                 }
323
324                 sv = retryServers
325         }
326
327         return locator, this.Want_replicas, nil
328 }