1885: Renamed DiscoverKeepDisks to DiscoverKeepServers, moved error.New()
[arvados.git] / sdk / go / src / arvados.org / keepclient / keepclient.go
1 package keepclient
2
3 import (
4         "crypto/md5"
5         "crypto/tls"
6         "encoding/json"
7         "errors"
8         "fmt"
9         "io"
10         "io/ioutil"
11         "log"
12         "net/http"
13         "os"
14         "sort"
15         "strconv"
16 )
17
18 // A Keep "block" is 64MB.
19 const BLOCKSIZE = 64 * 1024 * 1024
20
21 var BlockNotFound = errors.New("Block not found")
22 var InsufficientReplicasError = errors.New("Could not write sufficient replicas")
23
24 type KeepClient struct {
25         ApiServer     string
26         ApiToken      string
27         ApiInsecure   bool
28         Service_roots []string
29         Want_replicas int
30         Client        *http.Client
31 }
32
33 type KeepDisk struct {
34         Hostname string `json:"service_host"`
35         Port     int    `json:"service_port"`
36         SSL      bool   `json:"service_ssl_flag"`
37 }
38
39 func MakeKeepClient() (kc KeepClient, err error) {
40         tr := &http.Transport{
41                 TLSClientConfig: &tls.Config{InsecureSkipVerify: kc.ApiInsecure},
42         }
43
44         kc = KeepClient{
45                 ApiServer:     os.Getenv("ARVADOS_API_HOST"),
46                 ApiToken:      os.Getenv("ARVADOS_API_TOKEN"),
47                 ApiInsecure:   (os.Getenv("ARVADOS_API_HOST_INSECURE") != ""),
48                 Want_replicas: 2,
49                 Client:        &http.Client{Transport: tr}}
50
51         err = (&kc).DiscoverKeepServers()
52
53         return kc, err
54 }
55
56 func (this *KeepClient) DiscoverKeepServers() error {
57         // Construct request of keep disk list
58         var req *http.Request
59         var err error
60         if req, err = http.NewRequest("GET", fmt.Sprintf("https://%s/arvados/v1/keep_disks", this.ApiServer), nil); err != nil {
61                 return err
62         }
63
64         // Add api token header
65         req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", this.ApiToken))
66
67         // Make the request
68         var resp *http.Response
69         if resp, err = this.Client.Do(req); err != nil {
70                 return err
71         }
72
73         type SvcList struct {
74                 Items []KeepDisk `json:"items"`
75         }
76
77         // Decode json reply
78         dec := json.NewDecoder(resp.Body)
79         var m SvcList
80         if err := dec.Decode(&m); err != nil {
81                 return err
82         }
83
84         listed := make(map[string]bool)
85         this.Service_roots = make([]string, 0, len(m.Items))
86
87         for _, element := range m.Items {
88                 n := ""
89                 if element.SSL {
90                         n = "s"
91                 }
92
93                 // Construct server URL
94                 url := fmt.Sprintf("http%s://%s:%d", n, element.Hostname, element.Port)
95
96                 // Skip duplicates
97                 if !listed[url] {
98                         listed[url] = true
99                         this.Service_roots = append(this.Service_roots, url)
100                 }
101         }
102
103         // Must be sorted for ShuffledServiceRoots() to produce consistent
104         // results.
105         sort.Strings(this.Service_roots)
106
107         return nil
108 }
109
110 func (this KeepClient) ShuffledServiceRoots(hash string) (pseq []string) {
111         // Build an ordering with which to query the Keep servers based on the
112         // contents of the hash.  "hash" is a hex-encoded number at least 8
113         // digits (32 bits) long
114
115         // seed used to calculate the next keep server from 'pool' to be added
116         // to 'pseq'
117         seed := hash
118
119         // Keep servers still to be added to the ordering
120         pool := make([]string, len(this.Service_roots))
121         copy(pool, this.Service_roots)
122
123         // output probe sequence
124         pseq = make([]string, 0, len(this.Service_roots))
125
126         // iterate while there are servers left to be assigned
127         for len(pool) > 0 {
128
129                 if len(seed) < 8 {
130                         // ran out of digits in the seed
131                         if len(pseq) < (len(hash) / 4) {
132                                 // the number of servers added to the probe
133                                 // sequence is less than the number of 4-digit
134                                 // slices in 'hash' so refill the seed with the
135                                 // last 4 digits.
136                                 seed = hash[len(hash)-4:]
137                         }
138                         seed += hash
139                 }
140
141                 // Take the next 8 digits (32 bytes) and interpret as an integer,
142                 // then modulus with the size of the remaining pool to get the next
143                 // selected server.
144                 probe, _ := strconv.ParseUint(seed[0:8], 16, 32)
145                 probe %= uint64(len(pool))
146
147                 // Append the selected server to the probe sequence and remove it
148                 // from the pool.
149                 pseq = append(pseq, pool[probe])
150                 pool = append(pool[:probe], pool[probe+1:]...)
151
152                 // Remove the digits just used from the seed
153                 seed = seed[8:]
154         }
155         return pseq
156 }
157
158 type ReaderSlice struct {
159         slice        []byte
160         reader_error error
161 }
162
163 // Read repeatedly from the reader into the specified buffer, and report each
164 // read to channel 'c'.  Completes when Reader 'r' reports on the error channel
165 // and closes channel 'c'.
166 func ReadIntoBuffer(buffer []byte, r io.Reader, slices chan<- ReaderSlice) {
167         defer close(slices)
168
169         // Initially use entire buffer as scratch space
170         ptr := buffer[:]
171         for {
172                 var n int
173                 var err error
174                 if len(ptr) > 0 {
175                         // Read into the scratch space
176                         n, err = r.Read(ptr)
177                 } else {
178                         // Ran out of scratch space, try reading one more byte
179                         var b [1]byte
180                         n, err = r.Read(b[:])
181
182                         if n > 0 {
183                                 // Reader has more data but we have nowhere to
184                                 // put it, so we're stuffed
185                                 slices <- ReaderSlice{nil, io.ErrShortBuffer}
186                         } else {
187                                 // Return some other error (hopefully EOF)
188                                 slices <- ReaderSlice{nil, err}
189                         }
190                         return
191                 }
192
193                 // End on error (includes EOF)
194                 if err != nil {
195                         slices <- ReaderSlice{nil, err}
196                         return
197                 }
198
199                 if n > 0 {
200                         // Make a slice with the contents of the read
201                         slices <- ReaderSlice{ptr[:n], nil}
202
203                         // Adjust the scratch space slice
204                         ptr = ptr[n:]
205                 }
206         }
207 }
208
209 // A read request to the Transfer() function
210 type ReadRequest struct {
211         offset  int
212         maxsize int
213         result  chan<- ReadResult
214 }
215
216 // A read result from the Transfer() function
217 type ReadResult struct {
218         slice []byte
219         err   error
220 }
221
222 // Reads from the buffer managed by the Transfer()
223 type BufferReader struct {
224         offset    *int
225         requests  chan<- ReadRequest
226         responses chan ReadResult
227 }
228
229 func MakeBufferReader(requests chan<- ReadRequest) BufferReader {
230         return BufferReader{new(int), requests, make(chan ReadResult)}
231 }
232
233 // Reads from the buffer managed by the Transfer()
234 func (this BufferReader) Read(p []byte) (n int, err error) {
235         this.requests <- ReadRequest{*this.offset, len(p), this.responses}
236         rr, valid := <-this.responses
237         if valid {
238                 *this.offset += len(rr.slice)
239                 return copy(p, rr.slice), rr.err
240         } else {
241                 return 0, io.ErrUnexpectedEOF
242         }
243 }
244
245 func (this BufferReader) WriteTo(dest io.Writer) (written int64, err error) {
246         // Record starting offset in order to correctly report the number of bytes sent
247         starting_offset := *this.offset
248         for {
249                 this.requests <- ReadRequest{*this.offset, 32 * 1024, this.responses}
250                 rr, valid := <-this.responses
251                 if valid {
252                         log.Printf("WriteTo slice %v %d %v", *this.offset, len(rr.slice), rr.err)
253                         *this.offset += len(rr.slice)
254                         if rr.err != nil {
255                                 if rr.err == io.EOF {
256                                         // EOF is not an error.
257                                         return int64(*this.offset - starting_offset), nil
258                                 } else {
259                                         return int64(*this.offset - starting_offset), rr.err
260                                 }
261                         } else {
262                                 dest.Write(rr.slice)
263                         }
264                 } else {
265                         return int64(*this.offset), io.ErrUnexpectedEOF
266                 }
267         }
268 }
269
270 // Close the responses channel
271 func (this BufferReader) Close() error {
272         close(this.responses)
273         return nil
274 }
275
276 // Handle a read request.  Returns true if a response was sent, and false if
277 // the request should be queued.
278 func HandleReadRequest(req ReadRequest, body []byte, complete bool) bool {
279         log.Printf("HandleReadRequest %d %d %d", req.offset, req.maxsize, len(body))
280         if req.offset < len(body) {
281                 var end int
282                 if req.offset+req.maxsize < len(body) {
283                         end = req.offset + req.maxsize
284                 } else {
285                         end = len(body)
286                 }
287                 req.result <- ReadResult{body[req.offset:end], nil}
288                 return true
289         } else if complete && req.offset >= len(body) {
290                 req.result <- ReadResult{nil, io.EOF}
291                 return true
292         } else {
293                 return false
294         }
295 }
296
297 // If 'source_reader' is not nil, reads data from 'source_reader' and stores it
298 // in the provided buffer.  Otherwise, use the contents of 'buffer' as is.
299 // Accepts read requests on the buffer on the 'requests' channel.  Completes
300 // when 'requests' channel is closed.
301 func Transfer(source_buffer []byte, source_reader io.Reader, requests <-chan ReadRequest, reader_error chan error) {
302         // currently buffered data
303         var body []byte
304
305         // for receiving slices from ReadIntoBuffer
306         var slices chan ReaderSlice = nil
307
308         // indicates whether the buffered data is complete
309         var complete bool = false
310
311         if source_reader != nil {
312                 // 'body' is the buffer slice representing the body content read so far
313                 body = source_buffer[:0]
314
315                 // used to communicate slices of the buffer as they are
316                 // ReadIntoBuffer will close 'slices' when it is done with it
317                 slices = make(chan ReaderSlice)
318
319                 // Spin it off
320                 go ReadIntoBuffer(source_buffer, source_reader, slices)
321         } else {
322                 // use the whole buffer
323                 body = source_buffer[:]
324
325                 // buffer is complete
326                 complete = true
327         }
328
329         pending_requests := make([]ReadRequest, 0)
330
331         for {
332                 select {
333                 case req, valid := <-requests:
334                         // Handle a buffer read request
335                         if valid {
336                                 if !HandleReadRequest(req, body, complete) {
337                                         pending_requests = append(pending_requests, req)
338                                 }
339                         } else {
340                                 // closed 'requests' channel indicates we're done
341                                 return
342                         }
343
344                 case bk, valid := <-slices:
345                         // Got a new slice from the reader
346                         if valid {
347                                 if bk.reader_error != nil {
348                                         reader_error <- bk.reader_error
349                                         if bk.reader_error == io.EOF {
350                                                 // EOF indicates the reader is done
351                                                 // sending, so our buffer is complete.
352                                                 complete = true
353                                         } else {
354                                                 // some other reader error
355                                                 return
356                                         }
357                                 }
358
359                                 if bk.slice != nil {
360                                         // adjust body bounds now that another slice has been read
361                                         body = source_buffer[0 : len(body)+len(bk.slice)]
362                                 }
363
364                                 // handle pending reads
365                                 n := 0
366                                 for n < len(pending_requests) {
367                                         if HandleReadRequest(pending_requests[n], body, complete) {
368
369                                                 // move the element from the
370                                                 // back of the slice to
371                                                 // position 'n', then shorten
372                                                 // the slice by one element
373                                                 pending_requests[n] = pending_requests[len(pending_requests)-1]
374                                                 pending_requests = pending_requests[0 : len(pending_requests)-1]
375                                         } else {
376
377                                                 // Request wasn't handled, so keep it in the request slice
378                                                 n += 1
379                                         }
380                                 }
381                         } else {
382                                 if complete {
383                                         // no more reads
384                                         slices = nil
385                                 } else {
386                                         // reader channel closed without signaling EOF
387                                         reader_error <- io.ErrUnexpectedEOF
388                                         return
389                                 }
390                         }
391                 }
392         }
393 }
394
395 type UploadStatus struct {
396         Err        error
397         Url        string
398         StatusCode int
399 }
400
401 func (this KeepClient) uploadToKeepServer(host string, hash string, body io.ReadCloser,
402         upload_status chan<- UploadStatus, expectedLength int64) {
403
404         log.Printf("Uploading to %s", host)
405
406         var req *http.Request
407         var err error
408         var url = fmt.Sprintf("%s/%s", host, hash)
409         if req, err = http.NewRequest("PUT", url, nil); err != nil {
410                 upload_status <- UploadStatus{err, url, 0}
411                 return
412         }
413
414         if expectedLength > 0 {
415                 req.ContentLength = expectedLength
416         }
417
418         req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", this.ApiToken))
419         req.Header.Add("Content-Type", "application/octet-stream")
420         req.Body = body
421
422         var resp *http.Response
423         if resp, err = this.Client.Do(req); err != nil {
424                 upload_status <- UploadStatus{err, url, 0}
425                 return
426         }
427
428         if resp.StatusCode == http.StatusOK {
429                 upload_status <- UploadStatus{nil, url, resp.StatusCode}
430         } else {
431                 upload_status <- UploadStatus{errors.New(resp.Status), url, resp.StatusCode}
432         }
433 }
434
435 func (this KeepClient) putReplicas(
436         hash string,
437         requests chan ReadRequest,
438         reader_status chan error,
439         expectedLength int64) (replicas int, err error) {
440
441         // Calculate the ordering for uploading to servers
442         sv := this.ShuffledServiceRoots(hash)
443
444         // The next server to try contacting
445         next_server := 0
446
447         // The number of active writers
448         active := 0
449
450         // Used to communicate status from the upload goroutines
451         upload_status := make(chan UploadStatus)
452         defer close(upload_status)
453
454         // Desired number of replicas
455         remaining_replicas := this.Want_replicas
456
457         for remaining_replicas > 0 {
458                 for active < remaining_replicas {
459                         // Start some upload requests
460                         if next_server < len(sv) {
461                                 go this.uploadToKeepServer(sv[next_server], hash, MakeBufferReader(requests), upload_status, expectedLength)
462                                 next_server += 1
463                                 active += 1
464                         } else {
465                                 return (this.Want_replicas - remaining_replicas), InsufficientReplicasError
466                         }
467                 }
468
469                 // Now wait for something to happen.
470                 select {
471                 case status := <-reader_status:
472                         if status == io.EOF {
473                                 // good news!
474                         } else {
475                                 // bad news
476                                 return (this.Want_replicas - remaining_replicas), status
477                         }
478                 case status := <-upload_status:
479                         if status.StatusCode == 200 {
480                                 // good news!
481                                 remaining_replicas -= 1
482                         } else {
483                                 // writing to keep server failed for some reason
484                                 log.Printf("Keep server put to %v failed with '%v'",
485                                         status.Url, status.Err)
486                         }
487                         active -= 1
488                         log.Printf("Upload status %v %v %v", status.StatusCode, remaining_replicas, active)
489                 }
490         }
491
492         return (this.Want_replicas - remaining_replicas), nil
493 }
494
495 var OversizeBlockError = errors.New("Block too big")
496
497 func (this KeepClient) PutHR(hash string, r io.Reader, expectedLength int64) (replicas int, err error) {
498
499         // Buffer for reads from 'r'
500         var buffer []byte
501         if expectedLength > 0 {
502                 if expectedLength > BLOCKSIZE {
503                         return 0, OversizeBlockError
504                 }
505                 buffer = make([]byte, expectedLength)
506         } else {
507                 buffer = make([]byte, BLOCKSIZE)
508         }
509
510         // Read requests on Transfer() buffer
511         requests := make(chan ReadRequest)
512         defer close(requests)
513
514         // Reporting reader error states
515         reader_status := make(chan error)
516         defer close(reader_status)
517
518         // Start the transfer goroutine
519         go Transfer(buffer, r, requests, reader_status)
520
521         return this.putReplicas(hash, requests, reader_status, expectedLength)
522 }
523
524 func (this KeepClient) PutHB(hash string, buffer []byte) (replicas int, err error) {
525         // Read requests on Transfer() buffer
526         requests := make(chan ReadRequest)
527         defer close(requests)
528
529         // Start the transfer goroutine
530         go Transfer(buffer, nil, requests, nil)
531
532         return this.putReplicas(hash, requests, nil, int64(len(buffer)))
533 }
534
535 func (this KeepClient) PutB(buffer []byte) (hash string, replicas int, err error) {
536         hash = fmt.Sprintf("%x", md5.Sum(buffer))
537         replicas, err = this.PutHB(hash, buffer)
538         return hash, replicas, err
539 }
540
541 func (this KeepClient) PutR(r io.Reader) (hash string, replicas int, err error) {
542         if buffer, err := ioutil.ReadAll(r); err != nil {
543                 return "", 0, err
544         } else {
545                 return this.PutB(buffer)
546         }
547 }
548
549 func (this KeepClient) Get(hash string) (reader io.ReadCloser,
550         contentLength int64, url string, err error) {
551         return this.AuthorizedGet(hash, "", "")
552 }
553
554 func (this KeepClient) AuthorizedGet(hash string,
555         signature string,
556         timestamp string) (reader io.ReadCloser,
557         contentLength int64, url string, err error) {
558
559         // Calculate the ordering for asking servers
560         sv := this.ShuffledServiceRoots(hash)
561
562         for _, host := range sv {
563                 var req *http.Request
564                 var err error
565                 var url string
566                 if signature != "" {
567                         url = fmt.Sprintf("%s/%s+A%s@%s", host, hash,
568                                 signature, timestamp)
569                 } else {
570                         url = fmt.Sprintf("%s/%s", host, hash)
571                 }
572                 if req, err = http.NewRequest("GET", url, nil); err != nil {
573                         continue
574                 }
575
576                 req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", this.ApiToken))
577
578                 var resp *http.Response
579                 if resp, err = this.Client.Do(req); err != nil {
580                         continue
581                 }
582
583                 if resp.StatusCode == http.StatusOK {
584                         return resp.Body, resp.ContentLength, url, nil
585                 }
586         }
587
588         return nil, 0, "", BlockNotFound
589 }
590
591 func (this KeepClient) Ask(hash string) (contentLength int64, url string, err error) {
592         return this.AuthorizedAsk(hash, "", "")
593 }
594
595 func (this KeepClient) AuthorizedAsk(hash string, signature string,
596         timestamp string) (contentLength int64, url string, err error) {
597         // Calculate the ordering for asking servers
598         sv := this.ShuffledServiceRoots(hash)
599
600         for _, host := range sv {
601                 var req *http.Request
602                 var err error
603                 if signature != "" {
604                         url = fmt.Sprintf("%s/%s+A%s@%s", host, hash,
605                                 signature, timestamp)
606                 } else {
607                         url = fmt.Sprintf("%s/%s", host, hash)
608                 }
609
610                 if req, err = http.NewRequest("HEAD", url, nil); err != nil {
611                         continue
612                 }
613
614                 req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", this.ApiToken))
615
616                 var resp *http.Response
617                 if resp, err = this.Client.Do(req); err != nil {
618                         continue
619                 }
620
621                 if resp.StatusCode == http.StatusOK {
622                         return resp.ContentLength, url, nil
623                 }
624         }
625
626         return 0, "", BlockNotFound
627
628 }