0e74892a40951af958ba19137930d4ca63121b0a
[arvados.git] / sdk / go / keepclient / support.go
1 package keepclient
2
3 import (
4         "crypto/md5"
5         "errors"
6         "fmt"
7         "io"
8         "io/ioutil"
9         "log"
10         "math/rand"
11         "net/http"
12         "os"
13         "regexp"
14         "strings"
15
16         "git.curoverse.com/arvados.git/sdk/go/streamer"
17 )
18
19 // Function used to emit debug messages. The easiest way to enable
20 // keepclient debug messages in your application is to assign
21 // log.Printf to DebugPrintf.
22 var DebugPrintf = func(string, ...interface{}) {}
23
24 func init() {
25         var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$")
26         if matchTrue.MatchString(os.Getenv("ARVADOS_DEBUG")) {
27                 DebugPrintf = log.Printf
28         }
29 }
30
31 type keepService struct {
32         Uuid     string `json:"uuid"`
33         Hostname string `json:"service_host"`
34         Port     int    `json:"service_port"`
35         SSL      bool   `json:"service_ssl_flag"`
36         SvcType  string `json:"service_type"`
37         ReadOnly bool   `json:"read_only"`
38 }
39
40 // Md5String returns md5 hash for the bytes in the given string
41 func Md5String(s string) string {
42         return fmt.Sprintf("%x", md5.Sum([]byte(s)))
43 }
44
45 type svcList struct {
46         Items []keepService `json:"items"`
47 }
48
49 type uploadStatus struct {
50         err             error
51         url             string
52         statusCode      int
53         replicas_stored int
54         response        string
55 }
56
57 func (this *KeepClient) uploadToKeepServer(host string, hash string, body io.ReadCloser,
58         upload_status chan<- uploadStatus, expectedLength int64, requestID int32) {
59
60         var req *http.Request
61         var err error
62         var url = fmt.Sprintf("%s/%s", host, hash)
63         if req, err = http.NewRequest("PUT", url, nil); err != nil {
64                 DebugPrintf("DEBUG: [%08x] Error creating request PUT %v error: %v", requestID, url, err.Error())
65                 upload_status <- uploadStatus{err, url, 0, 0, ""}
66                 body.Close()
67                 return
68         }
69
70         req.ContentLength = expectedLength
71         if expectedLength > 0 {
72                 // Do() will close the body ReadCloser when it is done
73                 // with it.
74                 req.Body = body
75         } else {
76                 // "For client requests, a value of 0 means unknown if Body is
77                 // not nil."  In this case we do want the body to be empty, so
78                 // don't set req.Body.  However, we still need to close the
79                 // body ReadCloser.
80                 body.Close()
81         }
82
83         req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", this.Arvados.ApiToken))
84         req.Header.Add("Content-Type", "application/octet-stream")
85         req.Header.Add(X_Keep_Desired_Replicas, fmt.Sprint(this.Want_replicas))
86
87         var resp *http.Response
88         if resp, err = this.httpClient().Do(req); err != nil {
89                 DebugPrintf("DEBUG: [%08x] Upload failed %v error: %v", requestID, url, err.Error())
90                 upload_status <- uploadStatus{err, url, 0, 0, ""}
91                 return
92         }
93
94         rep := 1
95         if xr := resp.Header.Get(X_Keep_Replicas_Stored); xr != "" {
96                 fmt.Sscanf(xr, "%d", &rep)
97         }
98
99         defer resp.Body.Close()
100         defer io.Copy(ioutil.Discard, resp.Body)
101
102         respbody, err2 := ioutil.ReadAll(&io.LimitedReader{R: resp.Body, N: 4096})
103         response := strings.TrimSpace(string(respbody))
104         if err2 != nil && err2 != io.EOF {
105                 DebugPrintf("DEBUG: [%08x] Upload %v error: %v response: %v", requestID, url, err2.Error(), response)
106                 upload_status <- uploadStatus{err2, url, resp.StatusCode, rep, response}
107         } else if resp.StatusCode == http.StatusOK {
108                 DebugPrintf("DEBUG: [%08x] Upload %v success", requestID, url)
109                 upload_status <- uploadStatus{nil, url, resp.StatusCode, rep, response}
110         } else {
111                 if resp.StatusCode >= 300 && response == "" {
112                         response = resp.Status
113                 }
114                 DebugPrintf("DEBUG: [%08x] Upload %v error: %v response: %v", requestID, url, resp.StatusCode, response)
115                 upload_status <- uploadStatus{errors.New(resp.Status), url, resp.StatusCode, rep, response}
116         }
117 }
118
119 func (this *KeepClient) putReplicas(
120         hash string,
121         tr *streamer.AsyncStream,
122         expectedLength int64) (locator string, replicas int, err error) {
123
124         // Generate an arbitrary ID to identify this specific
125         // transaction in debug logs.
126         requestID := rand.Int31()
127
128         // Calculate the ordering for uploading to servers
129         sv := NewRootSorter(this.WritableLocalRoots(), hash).GetSortedRoots()
130
131         // The next server to try contacting
132         next_server := 0
133
134         // The number of active writers
135         active := 0
136
137         // Used to communicate status from the upload goroutines
138         upload_status := make(chan uploadStatus)
139         defer func() {
140                 // Wait for any abandoned uploads (e.g., we started
141                 // two uploads and the first replied with replicas=2)
142                 // to finish before closing the status channel.
143                 go func() {
144                         for active > 0 {
145                                 <-upload_status
146                         }
147                         close(upload_status)
148                 }()
149         }()
150
151         replicasDone := 0
152         replicasTodo := this.Want_replicas
153
154         replicasPerThread := this.replicasPerService
155         if replicasPerThread < 1 {
156                 // unlimited or unknown
157                 replicasPerThread = replicasTodo
158         }
159
160         retriesRemaining := 1 + this.Retries
161         var retryServers []string
162
163         lastError := make(map[string]string)
164
165         for retriesRemaining > 0 {
166                 retriesRemaining -= 1
167                 next_server = 0
168                 retryServers = []string{}
169                 for replicasTodo > 0 {
170                         for active*replicasPerThread < replicasTodo {
171                                 // Start some upload requests
172                                 if next_server < len(sv) {
173                                         DebugPrintf("DEBUG: [%08x] Begin upload %s to %s", requestID, hash, sv[next_server])
174                                         go this.uploadToKeepServer(sv[next_server], hash, tr.MakeStreamReader(), upload_status, expectedLength, requestID)
175                                         next_server += 1
176                                         active += 1
177                                 } else {
178                                         if active == 0 && retriesRemaining == 0 {
179                                                 msg := "Could not write sufficient replicas: "
180                                                 for _, resp := range lastError {
181                                                         msg += resp + "; "
182                                                 }
183                                                 msg = msg[:len(msg)-2]
184                                                 return locator, replicasDone, InsufficientReplicasError(errors.New(msg))
185                                         } else {
186                                                 break
187                                         }
188                                 }
189                         }
190                         DebugPrintf("DEBUG: [%08x] Replicas remaining to write: %v active uploads: %v",
191                                 requestID, replicasTodo, active)
192
193                         // Now wait for something to happen.
194                         if active > 0 {
195                                 status := <-upload_status
196                                 active -= 1
197
198                                 if status.statusCode == 200 {
199                                         // good news!
200                                         replicasDone += status.replicas_stored
201                                         replicasTodo -= status.replicas_stored
202                                         locator = status.response
203                                         delete(lastError, status.url)
204                                 } else {
205                                         msg := fmt.Sprintf("[%d] %s", status.statusCode, status.response)
206                                         if len(msg) > 100 {
207                                                 msg = msg[:100]
208                                         }
209                                         lastError[status.url] = msg
210                                 }
211
212                                 if status.statusCode == 0 || status.statusCode == 408 || status.statusCode == 429 ||
213                                         (status.statusCode >= 500 && status.statusCode != 503) {
214                                         // Timeout, too many requests, or other server side failure
215                                         // Do not retry when status code is 503, which means the keep server is full
216                                         retryServers = append(retryServers, status.url[0:strings.LastIndex(status.url, "/")])
217                                 }
218                         } else {
219                                 break
220                         }
221                 }
222
223                 sv = retryServers
224         }
225
226         return locator, replicasDone, nil
227 }