13111: Propagate errors in Child().
[arvados.git] / sdk / go / keepclient / support.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 package keepclient
6
7 import (
8         "crypto/md5"
9         "errors"
10         "fmt"
11         "io"
12         "io/ioutil"
13         "log"
14         "math/rand"
15         "net/http"
16         "os"
17         "strings"
18
19         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
20 )
21
22 // Function used to emit debug messages. The easiest way to enable
23 // keepclient debug messages in your application is to assign
24 // log.Printf to DebugPrintf.
25 var DebugPrintf = func(string, ...interface{}) {}
26
27 func init() {
28         if arvadosclient.StringBool(os.Getenv("ARVADOS_DEBUG")) {
29                 DebugPrintf = log.Printf
30         }
31 }
32
33 type keepService struct {
34         Uuid     string `json:"uuid"`
35         Hostname string `json:"service_host"`
36         Port     int    `json:"service_port"`
37         SSL      bool   `json:"service_ssl_flag"`
38         SvcType  string `json:"service_type"`
39         ReadOnly bool   `json:"read_only"`
40 }
41
42 // Md5String returns md5 hash for the bytes in the given string
43 func Md5String(s string) string {
44         return fmt.Sprintf("%x", md5.Sum([]byte(s)))
45 }
46
47 type svcList struct {
48         Items []keepService `json:"items"`
49 }
50
51 type uploadStatus struct {
52         err             error
53         url             string
54         statusCode      int
55         replicas_stored int
56         response        string
57 }
58
59 func (this *KeepClient) uploadToKeepServer(host string, hash string, body io.Reader,
60         upload_status chan<- uploadStatus, expectedLength int64, requestID int32) {
61
62         var req *http.Request
63         var err error
64         var url = fmt.Sprintf("%s/%s", host, hash)
65         if req, err = http.NewRequest("PUT", url, nil); err != nil {
66                 DebugPrintf("DEBUG: [%08x] Error creating request PUT %v error: %v", requestID, url, err.Error())
67                 upload_status <- uploadStatus{err, url, 0, 0, ""}
68                 return
69         }
70
71         req.ContentLength = expectedLength
72         if expectedLength > 0 {
73                 req.Body = ioutil.NopCloser(body)
74         } else {
75                 // "For client requests, a value of 0 means unknown if
76                 // Body is not nil."  In this case we do want the body
77                 // to be empty, so don't set req.Body.
78         }
79
80         req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", this.Arvados.ApiToken))
81         req.Header.Add("Content-Type", "application/octet-stream")
82         req.Header.Add(X_Keep_Desired_Replicas, fmt.Sprint(this.Want_replicas))
83
84         var resp *http.Response
85         if resp, err = this.httpClient().Do(req); err != nil {
86                 DebugPrintf("DEBUG: [%08x] Upload failed %v error: %v", requestID, url, err.Error())
87                 upload_status <- uploadStatus{err, url, 0, 0, ""}
88                 return
89         }
90
91         rep := 1
92         if xr := resp.Header.Get(X_Keep_Replicas_Stored); xr != "" {
93                 fmt.Sscanf(xr, "%d", &rep)
94         }
95
96         defer resp.Body.Close()
97         defer io.Copy(ioutil.Discard, resp.Body)
98
99         respbody, err2 := ioutil.ReadAll(&io.LimitedReader{R: resp.Body, N: 4096})
100         response := strings.TrimSpace(string(respbody))
101         if err2 != nil && err2 != io.EOF {
102                 DebugPrintf("DEBUG: [%08x] Upload %v error: %v response: %v", requestID, url, err2.Error(), response)
103                 upload_status <- uploadStatus{err2, url, resp.StatusCode, rep, response}
104         } else if resp.StatusCode == http.StatusOK {
105                 DebugPrintf("DEBUG: [%08x] Upload %v success", requestID, url)
106                 upload_status <- uploadStatus{nil, url, resp.StatusCode, rep, response}
107         } else {
108                 if resp.StatusCode >= 300 && response == "" {
109                         response = resp.Status
110                 }
111                 DebugPrintf("DEBUG: [%08x] Upload %v error: %v response: %v", requestID, url, resp.StatusCode, response)
112                 upload_status <- uploadStatus{errors.New(resp.Status), url, resp.StatusCode, rep, response}
113         }
114 }
115
116 func (this *KeepClient) putReplicas(
117         hash string,
118         getReader func() io.Reader,
119         expectedLength int64) (locator string, replicas int, err error) {
120
121         // Generate an arbitrary ID to identify this specific
122         // transaction in debug logs.
123         requestID := rand.Int31()
124
125         // Calculate the ordering for uploading to servers
126         sv := NewRootSorter(this.WritableLocalRoots(), hash).GetSortedRoots()
127
128         // The next server to try contacting
129         next_server := 0
130
131         // The number of active writers
132         active := 0
133
134         // Used to communicate status from the upload goroutines
135         upload_status := make(chan uploadStatus)
136         defer func() {
137                 // Wait for any abandoned uploads (e.g., we started
138                 // two uploads and the first replied with replicas=2)
139                 // to finish before closing the status channel.
140                 go func() {
141                         for active > 0 {
142                                 <-upload_status
143                         }
144                         close(upload_status)
145                 }()
146         }()
147
148         replicasDone := 0
149         replicasTodo := this.Want_replicas
150
151         replicasPerThread := this.replicasPerService
152         if replicasPerThread < 1 {
153                 // unlimited or unknown
154                 replicasPerThread = replicasTodo
155         }
156
157         retriesRemaining := 1 + this.Retries
158         var retryServers []string
159
160         lastError := make(map[string]string)
161
162         for retriesRemaining > 0 {
163                 retriesRemaining -= 1
164                 next_server = 0
165                 retryServers = []string{}
166                 for replicasTodo > 0 {
167                         for active*replicasPerThread < replicasTodo {
168                                 // Start some upload requests
169                                 if next_server < len(sv) {
170                                         DebugPrintf("DEBUG: [%08x] Begin upload %s to %s", requestID, hash, sv[next_server])
171                                         go this.uploadToKeepServer(sv[next_server], hash, getReader(), upload_status, expectedLength, requestID)
172                                         next_server += 1
173                                         active += 1
174                                 } else {
175                                         if active == 0 && retriesRemaining == 0 {
176                                                 msg := "Could not write sufficient replicas: "
177                                                 for _, resp := range lastError {
178                                                         msg += resp + "; "
179                                                 }
180                                                 msg = msg[:len(msg)-2]
181                                                 return locator, replicasDone, InsufficientReplicasError(errors.New(msg))
182                                         } else {
183                                                 break
184                                         }
185                                 }
186                         }
187                         DebugPrintf("DEBUG: [%08x] Replicas remaining to write: %v active uploads: %v",
188                                 requestID, replicasTodo, active)
189
190                         // Now wait for something to happen.
191                         if active > 0 {
192                                 status := <-upload_status
193                                 active -= 1
194
195                                 if status.statusCode == 200 {
196                                         // good news!
197                                         replicasDone += status.replicas_stored
198                                         replicasTodo -= status.replicas_stored
199                                         locator = status.response
200                                         delete(lastError, status.url)
201                                 } else {
202                                         msg := fmt.Sprintf("[%d] %s", status.statusCode, status.response)
203                                         if len(msg) > 100 {
204                                                 msg = msg[:100]
205                                         }
206                                         lastError[status.url] = msg
207                                 }
208
209                                 if status.statusCode == 0 || status.statusCode == 408 || status.statusCode == 429 ||
210                                         (status.statusCode >= 500 && status.statusCode != 503) {
211                                         // Timeout, too many requests, or other server side failure
212                                         // Do not retry when status code is 503, which means the keep server is full
213                                         retryServers = append(retryServers, status.url[0:strings.LastIndex(status.url, "/")])
214                                 }
215                         } else {
216                                 break
217                         }
218                 }
219
220                 sv = retryServers
221         }
222
223         return locator, replicasDone, nil
224 }