Merge branch '14259-pysdk-remote-block-copy'
[arvados.git] / services / keepstore / proxy_remote.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "context"
9         "errors"
10         "io"
11         "net/http"
12         "regexp"
13         "strings"
14         "sync"
15         "time"
16
17         "git.curoverse.com/arvados.git/sdk/go/arvados"
18         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
19         "git.curoverse.com/arvados.git/sdk/go/auth"
20         "git.curoverse.com/arvados.git/sdk/go/keepclient"
21 )
22
23 type remoteProxy struct {
24         clients map[string]*keepclient.KeepClient
25         mtx     sync.Mutex
26 }
27
28 func (rp *remoteProxy) Get(ctx context.Context, w http.ResponseWriter, r *http.Request, cluster *arvados.Cluster) {
29         // Intervening proxies must not return a cached GET response
30         // to a prior request if a X-Keep-Signature request header has
31         // been added or changed.
32         w.Header().Add("Vary", "X-Keep-Signature")
33
34         token := GetAPIToken(r)
35         if token == "" {
36                 http.Error(w, "no token provided in Authorization header", http.StatusUnauthorized)
37                 return
38         }
39         if strings.SplitN(r.Header.Get("X-Keep-Signature"), ",", 2)[0] == "local" {
40                 buf, err := getBufferWithContext(ctx, bufs, BlockSize)
41                 if err != nil {
42                         http.Error(w, err.Error(), http.StatusServiceUnavailable)
43                         return
44                 }
45                 defer bufs.Put(buf)
46                 rrc := &remoteResponseCacher{
47                         Locator:        r.URL.Path[1:],
48                         Token:          token,
49                         Buffer:         buf[:0],
50                         ResponseWriter: w,
51                         Context:        ctx,
52                 }
53                 defer rrc.Close()
54                 w = rrc
55         }
56         var remoteClient *keepclient.KeepClient
57         var parts []string
58         for i, part := range strings.Split(r.URL.Path[1:], "+") {
59                 switch {
60                 case i == 0:
61                         // don't try to parse hash part as hint
62                 case strings.HasPrefix(part, "A"):
63                         // drop local permission hint
64                         continue
65                 case len(part) > 7 && part[0] == 'R' && part[6] == '-':
66                         remoteID := part[1:6]
67                         remote, ok := cluster.RemoteClusters[remoteID]
68                         if !ok {
69                                 http.Error(w, "remote cluster not configured", http.StatusBadRequest)
70                                 return
71                         }
72                         kc, err := rp.remoteClient(remoteID, remote, token)
73                         if err == auth.ErrObsoleteToken {
74                                 http.Error(w, err.Error(), http.StatusBadRequest)
75                                 return
76                         } else if err != nil {
77                                 http.Error(w, err.Error(), http.StatusInternalServerError)
78                                 return
79                         }
80                         remoteClient = kc
81                         part = "A" + part[7:]
82                 }
83                 parts = append(parts, part)
84         }
85         if remoteClient == nil {
86                 http.Error(w, "bad request", http.StatusBadRequest)
87                 return
88         }
89         locator := strings.Join(parts, "+")
90         rdr, _, _, err := remoteClient.Get(locator)
91         switch err.(type) {
92         case nil:
93                 defer rdr.Close()
94                 io.Copy(w, rdr)
95         case *keepclient.ErrNotFound:
96                 http.Error(w, err.Error(), http.StatusNotFound)
97         default:
98                 http.Error(w, err.Error(), http.StatusBadGateway)
99         }
100 }
101
102 func (rp *remoteProxy) remoteClient(remoteID string, remoteCluster arvados.RemoteCluster, token string) (*keepclient.KeepClient, error) {
103         rp.mtx.Lock()
104         kc, ok := rp.clients[remoteID]
105         rp.mtx.Unlock()
106         if !ok {
107                 c := &arvados.Client{
108                         APIHost:   remoteCluster.Host,
109                         AuthToken: "xxx",
110                         Insecure:  remoteCluster.Insecure,
111                 }
112                 ac, err := arvadosclient.New(c)
113                 if err != nil {
114                         return nil, err
115                 }
116                 kc, err = keepclient.MakeKeepClient(ac)
117                 if err != nil {
118                         return nil, err
119                 }
120
121                 rp.mtx.Lock()
122                 if rp.clients == nil {
123                         rp.clients = map[string]*keepclient.KeepClient{remoteID: kc}
124                 } else {
125                         rp.clients[remoteID] = kc
126                 }
127                 rp.mtx.Unlock()
128         }
129         accopy := *kc.Arvados
130         accopy.ApiToken = token
131         kccopy := *kc
132         kccopy.Arvados = &accopy
133         token, err := auth.SaltToken(token, remoteID)
134         if err != nil {
135                 return nil, err
136         }
137         kccopy.Arvados.ApiToken = token
138         return &kccopy, nil
139 }
140
141 var localOrRemoteSignature = regexp.MustCompile(`\+[AR][^\+]*`)
142
143 // remoteResponseCacher wraps http.ResponseWriter. It buffers the
144 // response data in the provided buffer, writes/touches a copy on a
145 // local volume, adds a response header with a locally-signed locator,
146 // and finally writes the data through.
147 type remoteResponseCacher struct {
148         Locator string
149         Token   string
150         Buffer  []byte
151         Context context.Context
152         http.ResponseWriter
153         statusCode int
154 }
155
156 func (rrc *remoteResponseCacher) Write(p []byte) (int, error) {
157         if len(rrc.Buffer)+len(p) > cap(rrc.Buffer) {
158                 return 0, errors.New("buffer full")
159         }
160         rrc.Buffer = append(rrc.Buffer, p...)
161         return len(p), nil
162 }
163
164 func (rrc *remoteResponseCacher) WriteHeader(statusCode int) {
165         rrc.statusCode = statusCode
166 }
167
168 func (rrc *remoteResponseCacher) Close() error {
169         if rrc.statusCode == 0 {
170                 rrc.statusCode = http.StatusOK
171         } else if rrc.statusCode != http.StatusOK {
172                 rrc.ResponseWriter.WriteHeader(rrc.statusCode)
173                 rrc.ResponseWriter.Write(rrc.Buffer)
174                 return nil
175         }
176         _, err := PutBlock(rrc.Context, rrc.Buffer, rrc.Locator[:32])
177         if rrc.Context.Err() != nil {
178                 // If caller hung up, log that instead of subsequent/misleading errors.
179                 http.Error(rrc.ResponseWriter, rrc.Context.Err().Error(), http.StatusGatewayTimeout)
180                 return err
181         }
182         if err == RequestHashError {
183                 http.Error(rrc.ResponseWriter, "checksum mismatch in remote response", http.StatusBadGateway)
184                 return err
185         }
186         if err, ok := err.(*KeepError); ok {
187                 http.Error(rrc.ResponseWriter, err.Error(), err.HTTPCode)
188                 return err
189         }
190         if err != nil {
191                 http.Error(rrc.ResponseWriter, err.Error(), http.StatusBadGateway)
192                 return err
193         }
194
195         unsigned := localOrRemoteSignature.ReplaceAllLiteralString(rrc.Locator, "")
196         signed := SignLocator(unsigned, rrc.Token, time.Now().Add(theConfig.BlobSignatureTTL.Duration()))
197         if signed == unsigned {
198                 err = errors.New("could not sign locator")
199                 http.Error(rrc.ResponseWriter, err.Error(), http.StatusInternalServerError)
200                 return err
201         }
202         rrc.Header().Set("X-Keep-Locator", signed)
203         rrc.ResponseWriter.WriteHeader(rrc.statusCode)
204         _, err = rrc.ResponseWriter.Write(rrc.Buffer)
205         return err
206 }