1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
16 "git.curoverse.com/arvados.git/sdk/go/arvados"
17 "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
18 "git.curoverse.com/arvados.git/sdk/go/config"
19 "git.curoverse.com/arvados.git/sdk/go/keepclient"
20 "github.com/coreos/go-systemd/daemon"
21 "github.com/prometheus/client_golang/prometheus"
26 // A Keep "block" is 64MB.
27 const BlockSize = 64 * 1024 * 1024
29 // A Keep volume must have at least MinFreeKilobytes available
30 // in order to permit writes.
31 const MinFreeKilobytes = BlockSize / 1024
33 // ProcMounts /proc/mounts
34 var ProcMounts = "/proc/mounts"
40 type KeepError struct {
46 BadRequestError = &KeepError{400, "Bad Request"}
47 UnauthorizedError = &KeepError{401, "Unauthorized"}
48 CollisionError = &KeepError{500, "Collision"}
49 RequestHashError = &KeepError{422, "Hash mismatch in request"}
50 PermissionError = &KeepError{403, "Forbidden"}
51 DiskHashError = &KeepError{500, "Hash mismatch in stored data"}
52 ExpiredError = &KeepError{401, "Expired permission signature"}
53 NotFoundError = &KeepError{404, "Not Found"}
54 VolumeBusyError = &KeepError{503, "Volume backend busy"}
55 GenericError = &KeepError{500, "Fail"}
56 FullError = &KeepError{503, "Full"}
57 SizeRequiredError = &KeepError{411, "Missing Content-Length"}
58 TooLongError = &KeepError{413, "Block is too large"}
59 MethodDisabledError = &KeepError{405, "Method disabled"}
60 ErrNotImplemented = &KeepError{500, "Unsupported configuration"}
61 ErrClientDisconnect = &KeepError{503, "Client disconnected"}
64 func (e *KeepError) Error() string {
68 // ========================
69 // Internal data structures
71 // These global variables are used by multiple parts of the
72 // program. They are good candidates for moving into their own
75 // The Keep VolumeManager maintains a list of available volumes.
76 // Initialized by the --volumes flag (or by FindKeepVolumes).
77 var KeepVM VolumeManager
79 // The pull list manager and trash queue are threadsafe queues which
80 // support atomic update operations. The PullHandler and TrashHandler
81 // store results from Data Manager /pull and /trash requests here.
83 // See the Keep and Data Manager design documents for more details:
84 // https://arvados.org/projects/arvados/wiki/Keep_Design_Doc
85 // https://arvados.org/projects/arvados/wiki/Data_Manager_Design_Doc
91 deprecated.beforeFlagParse(theConfig)
93 dumpConfig := flag.Bool("dump-config", false, "write current configuration to stdout and exit (useful for migrating from command line flags to config file)")
94 getVersion := flag.Bool("version", false, "Print version information and exit.")
96 defaultConfigPath := "/etc/arvados/keepstore/keepstore.yml"
102 "YAML or JSON configuration file `path`")
106 // Print version information if requested
108 fmt.Printf("keepstore %s\n", version)
112 deprecated.afterFlagParse(theConfig)
114 err := config.LoadFile(theConfig, configPath)
115 if err != nil && (!os.IsNotExist(err) || configPath != defaultConfigPath) {
120 log.Fatal(config.DumpAndExit(theConfig))
123 log.Printf("keepstore %s started", version)
125 metricsRegistry := prometheus.NewRegistry()
127 err = theConfig.Start(metricsRegistry)
132 if pidfile := theConfig.PIDFile; pidfile != "" {
133 f, err := os.OpenFile(pidfile, os.O_RDWR|os.O_CREATE, 0777)
135 log.Fatalf("open pidfile (%s): %s", pidfile, err)
138 err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
140 log.Fatalf("flock pidfile (%s): %s", pidfile, err)
142 defer os.Remove(pidfile)
145 log.Fatalf("truncate pidfile (%s): %s", pidfile, err)
147 _, err = fmt.Fprint(f, os.Getpid())
149 log.Fatalf("write pidfile (%s): %s", pidfile, err)
153 log.Fatalf("sync pidfile (%s): %s", pidfile, err)
157 var cluster *arvados.Cluster
158 cfg, err := arvados.GetConfig(arvados.DefaultConfigFile)
159 if err != nil && os.IsNotExist(err) {
160 log.Warnf("DEPRECATED: proceeding without cluster configuration file %q (%s)", arvados.DefaultConfigFile, err)
161 cluster = &arvados.Cluster{
164 } else if err != nil {
165 log.Fatalf("load config %q: %s", arvados.DefaultConfigFile, err)
167 cluster, err = cfg.GetCluster("")
169 log.Fatalf("config error in %q: %s", arvados.DefaultConfigFile, err)
173 log.Println("keepstore starting, pid", os.Getpid())
174 defer log.Println("keepstore exiting, pid", os.Getpid())
176 // Start a round-robin VolumeManager with the volumes we have found.
177 KeepVM = MakeRRVolumeManager(theConfig.Volumes)
179 // Middleware/handler stack
180 router := MakeRESTRouter(cluster, metricsRegistry)
182 // Set up a TCP listener.
183 listener, err := net.Listen("tcp", theConfig.Listen)
188 // Initialize keepclient for pull workers
189 keepClient := &keepclient.KeepClient{
190 Arvados: &arvadosclient.ArvadosClient{},
194 // Initialize the pullq and workers
195 pullq = NewWorkQueue()
196 for i := 0; i < 1 || i < theConfig.PullWorkers; i++ {
197 go RunPullWorker(pullq, keepClient)
200 // Initialize the trashq and workers
201 trashq = NewWorkQueue()
202 for i := 0; i < 1 || i < theConfig.TrashWorkers; i++ {
203 go RunTrashWorker(trashq)
206 // Start emptyTrash goroutine
207 doneEmptyingTrash := make(chan bool)
208 go emptyTrash(doneEmptyingTrash, theConfig.TrashCheckInterval.Duration())
210 // Shut down the server gracefully (by closing the listener)
211 // if SIGTERM is received.
212 term := make(chan os.Signal, 1)
213 go func(sig <-chan os.Signal) {
215 log.Println("caught signal:", s)
216 doneEmptyingTrash <- true
219 signal.Notify(term, syscall.SIGTERM)
220 signal.Notify(term, syscall.SIGINT)
222 if _, err := daemon.SdNotify(false, "READY=1"); err != nil {
223 log.Printf("Error notifying init daemon: %v", err)
225 log.Println("listening at", listener.Addr())
231 // Periodically (once per interval) invoke EmptyTrash on all volumes.
232 func emptyTrash(done <-chan bool, interval time.Duration) {
233 ticker := time.NewTicker(interval)
238 for _, v := range theConfig.Volumes {