12447: Simplify BlockCache locking.
[arvados.git] / sdk / go / keepclient / support.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 package keepclient
6
7 import (
8         "crypto/md5"
9         "errors"
10         "fmt"
11         "io"
12         "io/ioutil"
13         "log"
14         "math/rand"
15         "net/http"
16         "os"
17         "strings"
18
19         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
20         "git.curoverse.com/arvados.git/sdk/go/streamer"
21 )
22
23 // Function used to emit debug messages. The easiest way to enable
24 // keepclient debug messages in your application is to assign
25 // log.Printf to DebugPrintf.
26 var DebugPrintf = func(string, ...interface{}) {}
27
28 func init() {
29         if arvadosclient.StringBool(os.Getenv("ARVADOS_DEBUG")) {
30                 DebugPrintf = log.Printf
31         }
32 }
33
34 type keepService struct {
35         Uuid     string `json:"uuid"`
36         Hostname string `json:"service_host"`
37         Port     int    `json:"service_port"`
38         SSL      bool   `json:"service_ssl_flag"`
39         SvcType  string `json:"service_type"`
40         ReadOnly bool   `json:"read_only"`
41 }
42
43 // Md5String returns md5 hash for the bytes in the given string
44 func Md5String(s string) string {
45         return fmt.Sprintf("%x", md5.Sum([]byte(s)))
46 }
47
48 type svcList struct {
49         Items []keepService `json:"items"`
50 }
51
52 type uploadStatus struct {
53         err             error
54         url             string
55         statusCode      int
56         replicas_stored int
57         response        string
58 }
59
60 func (this *KeepClient) uploadToKeepServer(host string, hash string, body io.ReadCloser,
61         upload_status chan<- uploadStatus, expectedLength int64, requestID int32) {
62
63         var req *http.Request
64         var err error
65         var url = fmt.Sprintf("%s/%s", host, hash)
66         if req, err = http.NewRequest("PUT", url, nil); err != nil {
67                 DebugPrintf("DEBUG: [%08x] Error creating request PUT %v error: %v", requestID, url, err.Error())
68                 upload_status <- uploadStatus{err, url, 0, 0, ""}
69                 body.Close()
70                 return
71         }
72
73         req.ContentLength = expectedLength
74         if expectedLength > 0 {
75                 // Do() will close the body ReadCloser when it is done
76                 // with it.
77                 req.Body = body
78         } else {
79                 // "For client requests, a value of 0 means unknown if Body is
80                 // not nil."  In this case we do want the body to be empty, so
81                 // don't set req.Body.  However, we still need to close the
82                 // body ReadCloser.
83                 body.Close()
84         }
85
86         req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", this.Arvados.ApiToken))
87         req.Header.Add("Content-Type", "application/octet-stream")
88         req.Header.Add(X_Keep_Desired_Replicas, fmt.Sprint(this.Want_replicas))
89
90         var resp *http.Response
91         if resp, err = this.httpClient().Do(req); err != nil {
92                 DebugPrintf("DEBUG: [%08x] Upload failed %v error: %v", requestID, url, err.Error())
93                 upload_status <- uploadStatus{err, url, 0, 0, ""}
94                 return
95         }
96
97         rep := 1
98         if xr := resp.Header.Get(X_Keep_Replicas_Stored); xr != "" {
99                 fmt.Sscanf(xr, "%d", &rep)
100         }
101
102         defer resp.Body.Close()
103         defer io.Copy(ioutil.Discard, resp.Body)
104
105         respbody, err2 := ioutil.ReadAll(&io.LimitedReader{R: resp.Body, N: 4096})
106         response := strings.TrimSpace(string(respbody))
107         if err2 != nil && err2 != io.EOF {
108                 DebugPrintf("DEBUG: [%08x] Upload %v error: %v response: %v", requestID, url, err2.Error(), response)
109                 upload_status <- uploadStatus{err2, url, resp.StatusCode, rep, response}
110         } else if resp.StatusCode == http.StatusOK {
111                 DebugPrintf("DEBUG: [%08x] Upload %v success", requestID, url)
112                 upload_status <- uploadStatus{nil, url, resp.StatusCode, rep, response}
113         } else {
114                 if resp.StatusCode >= 300 && response == "" {
115                         response = resp.Status
116                 }
117                 DebugPrintf("DEBUG: [%08x] Upload %v error: %v response: %v", requestID, url, resp.StatusCode, response)
118                 upload_status <- uploadStatus{errors.New(resp.Status), url, resp.StatusCode, rep, response}
119         }
120 }
121
122 func (this *KeepClient) putReplicas(
123         hash string,
124         tr *streamer.AsyncStream,
125         expectedLength int64) (locator string, replicas int, err error) {
126
127         // Generate an arbitrary ID to identify this specific
128         // transaction in debug logs.
129         requestID := rand.Int31()
130
131         // Calculate the ordering for uploading to servers
132         sv := NewRootSorter(this.WritableLocalRoots(), hash).GetSortedRoots()
133
134         // The next server to try contacting
135         next_server := 0
136
137         // The number of active writers
138         active := 0
139
140         // Used to communicate status from the upload goroutines
141         upload_status := make(chan uploadStatus)
142         defer func() {
143                 // Wait for any abandoned uploads (e.g., we started
144                 // two uploads and the first replied with replicas=2)
145                 // to finish before closing the status channel.
146                 go func() {
147                         for active > 0 {
148                                 <-upload_status
149                         }
150                         close(upload_status)
151                 }()
152         }()
153
154         replicasDone := 0
155         replicasTodo := this.Want_replicas
156
157         replicasPerThread := this.replicasPerService
158         if replicasPerThread < 1 {
159                 // unlimited or unknown
160                 replicasPerThread = replicasTodo
161         }
162
163         retriesRemaining := 1 + this.Retries
164         var retryServers []string
165
166         lastError := make(map[string]string)
167
168         for retriesRemaining > 0 {
169                 retriesRemaining -= 1
170                 next_server = 0
171                 retryServers = []string{}
172                 for replicasTodo > 0 {
173                         for active*replicasPerThread < replicasTodo {
174                                 // Start some upload requests
175                                 if next_server < len(sv) {
176                                         DebugPrintf("DEBUG: [%08x] Begin upload %s to %s", requestID, hash, sv[next_server])
177                                         go this.uploadToKeepServer(sv[next_server], hash, tr.MakeStreamReader(), upload_status, expectedLength, requestID)
178                                         next_server += 1
179                                         active += 1
180                                 } else {
181                                         if active == 0 && retriesRemaining == 0 {
182                                                 msg := "Could not write sufficient replicas: "
183                                                 for _, resp := range lastError {
184                                                         msg += resp + "; "
185                                                 }
186                                                 msg = msg[:len(msg)-2]
187                                                 return locator, replicasDone, InsufficientReplicasError(errors.New(msg))
188                                         } else {
189                                                 break
190                                         }
191                                 }
192                         }
193                         DebugPrintf("DEBUG: [%08x] Replicas remaining to write: %v active uploads: %v",
194                                 requestID, replicasTodo, active)
195
196                         // Now wait for something to happen.
197                         if active > 0 {
198                                 status := <-upload_status
199                                 active -= 1
200
201                                 if status.statusCode == 200 {
202                                         // good news!
203                                         replicasDone += status.replicas_stored
204                                         replicasTodo -= status.replicas_stored
205                                         locator = status.response
206                                         delete(lastError, status.url)
207                                 } else {
208                                         msg := fmt.Sprintf("[%d] %s", status.statusCode, status.response)
209                                         if len(msg) > 100 {
210                                                 msg = msg[:100]
211                                         }
212                                         lastError[status.url] = msg
213                                 }
214
215                                 if status.statusCode == 0 || status.statusCode == 408 || status.statusCode == 429 ||
216                                         (status.statusCode >= 500 && status.statusCode != 503) {
217                                         // Timeout, too many requests, or other server side failure
218                                         // Do not retry when status code is 503, which means the keep server is full
219                                         retryServers = append(retryServers, status.url[0:strings.LastIndex(status.url, "/")])
220                                 }
221                         } else {
222                                 break
223                         }
224                 }
225
226                 sv = retryServers
227         }
228
229         return locator, replicasDone, nil
230 }