9701: Merge branch '9463-change-arvput-use-collection-class' into 9701-collection...
[arvados.git] / sdk / go / keepclient / support.go
1 package keepclient
2
3 import (
4         "crypto/md5"
5         "errors"
6         "fmt"
7         "git.curoverse.com/arvados.git/sdk/go/streamer"
8         "io"
9         "io/ioutil"
10         "math/rand"
11         "net"
12         "net/http"
13         "strings"
14         "time"
15 )
16
17 // Function used to emit debug messages. The easiest way to enable
18 // keepclient debug messages in your application is to assign
19 // log.Printf to DebugPrintf.
20 var DebugPrintf = func(string, ...interface{}) {}
21
22 type keepService struct {
23         Uuid     string `json:"uuid"`
24         Hostname string `json:"service_host"`
25         Port     int    `json:"service_port"`
26         SSL      bool   `json:"service_ssl_flag"`
27         SvcType  string `json:"service_type"`
28         ReadOnly bool   `json:"read_only"`
29 }
30
31 // Md5String returns md5 hash for the bytes in the given string
32 func Md5String(s string) string {
33         return fmt.Sprintf("%x", md5.Sum([]byte(s)))
34 }
35
36 // Set timeouts applicable when connecting to non-disk services
37 // (assumed to be over the Internet).
38 func (this *KeepClient) setClientSettingsNonDisk() {
39         if this.Client.Timeout == 0 {
40                 // Maximum time to wait for a complete response
41                 this.Client.Timeout = 300 * time.Second
42
43                 // TCP and TLS connection settings
44                 this.Client.Transport = &http.Transport{
45                         Dial: (&net.Dialer{
46                                 // The maximum time to wait to set up
47                                 // the initial TCP connection.
48                                 Timeout: 30 * time.Second,
49
50                                 // The TCP keep alive heartbeat
51                                 // interval.
52                                 KeepAlive: 120 * time.Second,
53                         }).Dial,
54
55                         TLSHandshakeTimeout: 10 * time.Second,
56                 }
57         }
58 }
59
60 // Set timeouts applicable when connecting to keepstore services directly
61 // (assumed to be on the local network).
62 func (this *KeepClient) setClientSettingsDisk() {
63         if this.Client.Timeout == 0 {
64                 // Maximum time to wait for a complete response
65                 this.Client.Timeout = 20 * time.Second
66
67                 // TCP and TLS connection timeouts
68                 this.Client.Transport = &http.Transport{
69                         Dial: (&net.Dialer{
70                                 // The maximum time to wait to set up
71                                 // the initial TCP connection.
72                                 Timeout: 2 * time.Second,
73
74                                 // The TCP keep alive heartbeat
75                                 // interval.
76                                 KeepAlive: 180 * time.Second,
77                         }).Dial,
78
79                         TLSHandshakeTimeout: 4 * time.Second,
80                 }
81         }
82 }
83
84 type svcList struct {
85         Items []keepService `json:"items"`
86 }
87
88 type uploadStatus struct {
89         err             error
90         url             string
91         statusCode      int
92         replicas_stored int
93         response        string
94 }
95
96 func (this *KeepClient) uploadToKeepServer(host string, hash string, body io.ReadCloser,
97         upload_status chan<- uploadStatus, expectedLength int64, requestID int32) {
98
99         var req *http.Request
100         var err error
101         var url = fmt.Sprintf("%s/%s", host, hash)
102         if req, err = http.NewRequest("PUT", url, nil); err != nil {
103                 DebugPrintf("DEBUG: [%08x] Error creating request PUT %v error: %v", requestID, url, err.Error())
104                 upload_status <- uploadStatus{err, url, 0, 0, ""}
105                 body.Close()
106                 return
107         }
108
109         req.ContentLength = expectedLength
110         if expectedLength > 0 {
111                 // http.Client.Do will close the body ReadCloser when it is
112                 // done with it.
113                 req.Body = body
114         } else {
115                 // "For client requests, a value of 0 means unknown if Body is
116                 // not nil."  In this case we do want the body to be empty, so
117                 // don't set req.Body.  However, we still need to close the
118                 // body ReadCloser.
119                 body.Close()
120         }
121
122         req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", this.Arvados.ApiToken))
123         req.Header.Add("Content-Type", "application/octet-stream")
124         req.Header.Add(X_Keep_Desired_Replicas, fmt.Sprint(this.Want_replicas))
125
126         var resp *http.Response
127         if resp, err = this.Client.Do(req); err != nil {
128                 DebugPrintf("DEBUG: [%08x] Upload failed %v error: %v", requestID, url, err.Error())
129                 upload_status <- uploadStatus{err, url, 0, 0, ""}
130                 return
131         }
132
133         rep := 1
134         if xr := resp.Header.Get(X_Keep_Replicas_Stored); xr != "" {
135                 fmt.Sscanf(xr, "%d", &rep)
136         }
137
138         defer resp.Body.Close()
139         defer io.Copy(ioutil.Discard, resp.Body)
140
141         respbody, err2 := ioutil.ReadAll(&io.LimitedReader{R: resp.Body, N: 4096})
142         response := strings.TrimSpace(string(respbody))
143         if err2 != nil && err2 != io.EOF {
144                 DebugPrintf("DEBUG: [%08x] Upload %v error: %v response: %v", requestID, url, err2.Error(), response)
145                 upload_status <- uploadStatus{err2, url, resp.StatusCode, rep, response}
146         } else if resp.StatusCode == http.StatusOK {
147                 DebugPrintf("DEBUG: [%08x] Upload %v success", requestID, url)
148                 upload_status <- uploadStatus{nil, url, resp.StatusCode, rep, response}
149         } else {
150                 DebugPrintf("DEBUG: [%08x] Upload %v error: %v response: %v", requestID, url, resp.StatusCode, response)
151                 upload_status <- uploadStatus{errors.New(resp.Status), url, resp.StatusCode, rep, response}
152         }
153 }
154
155 func (this *KeepClient) putReplicas(
156         hash string,
157         tr *streamer.AsyncStream,
158         expectedLength int64) (locator string, replicas int, err error) {
159
160         // Generate an arbitrary ID to identify this specific
161         // transaction in debug logs.
162         requestID := rand.Int31()
163
164         // Calculate the ordering for uploading to servers
165         sv := NewRootSorter(this.WritableLocalRoots(), hash).GetSortedRoots()
166
167         // The next server to try contacting
168         next_server := 0
169
170         // The number of active writers
171         active := 0
172
173         // Used to communicate status from the upload goroutines
174         upload_status := make(chan uploadStatus)
175         defer func() {
176                 // Wait for any abandoned uploads (e.g., we started
177                 // two uploads and the first replied with replicas=2)
178                 // to finish before closing the status channel.
179                 go func() {
180                         for active > 0 {
181                                 <-upload_status
182                         }
183                         close(upload_status)
184                 }()
185         }()
186
187         replicasDone := 0
188         replicasTodo := this.Want_replicas
189
190         replicasPerThread := this.replicasPerService
191         if replicasPerThread < 1 {
192                 // unlimited or unknown
193                 replicasPerThread = replicasTodo
194         }
195
196         retriesRemaining := 1 + this.Retries
197         var retryServers []string
198
199         for retriesRemaining > 0 {
200                 retriesRemaining -= 1
201                 next_server = 0
202                 retryServers = []string{}
203                 for replicasTodo > 0 {
204                         for active*replicasPerThread < replicasTodo {
205                                 // Start some upload requests
206                                 if next_server < len(sv) {
207                                         DebugPrintf("DEBUG: [%08x] Begin upload %s to %s", requestID, hash, sv[next_server])
208                                         go this.uploadToKeepServer(sv[next_server], hash, tr.MakeStreamReader(), upload_status, expectedLength, requestID)
209                                         next_server += 1
210                                         active += 1
211                                 } else {
212                                         if active == 0 && retriesRemaining == 0 {
213                                                 return locator, replicasDone, InsufficientReplicasError
214                                         } else {
215                                                 break
216                                         }
217                                 }
218                         }
219                         DebugPrintf("DEBUG: [%08x] Replicas remaining to write: %v active uploads: %v",
220                                 requestID, replicasTodo, active)
221
222                         // Now wait for something to happen.
223                         if active > 0 {
224                                 status := <-upload_status
225                                 active -= 1
226
227                                 if status.statusCode == 200 {
228                                         // good news!
229                                         replicasDone += status.replicas_stored
230                                         replicasTodo -= status.replicas_stored
231                                         locator = status.response
232                                 } else if status.statusCode == 0 || status.statusCode == 408 || status.statusCode == 429 ||
233                                         (status.statusCode >= 500 && status.statusCode != 503) {
234                                         // Timeout, too many requests, or other server side failure
235                                         // Do not retry when status code is 503, which means the keep server is full
236                                         retryServers = append(retryServers, status.url[0:strings.LastIndex(status.url, "/")])
237                                 }
238                         } else {
239                                 break
240                         }
241                 }
242
243                 sv = retryServers
244         }
245
246         return locator, replicasDone, nil
247 }