1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
15 "git.arvados.org/arvados.git/sdk/go/ctxlog"
16 "github.com/jmoiron/sqlx"
20 TrashSweep = &DBLocker{key: 10001}
21 ContainerLogSweep = &DBLocker{key: 10002}
22 KeepBalanceService = &DBLocker{key: 10003} // keep-balance service in periodic-sweep loop
23 KeepBalanceActive = &DBLocker{key: 10004} // keep-balance sweep in progress (either -once=true or service loop)
24 retryDelay = 5 * time.Second
27 // DBLocker uses pg_advisory_lock to maintain a cluster-wide lock for
28 // a long-running task like "do X every N seconds".
29 type DBLocker struct {
33 getdb func(context.Context) (*sqlx.DB, error)
34 conn *sql.Conn // != nil if advisory lock has been acquired
37 // Lock acquires the advisory lock, waiting/reconnecting if needed.
39 // Returns false if ctx is canceled before the lock is acquired.
40 func (dbl *DBLocker) Lock(ctx context.Context, getdb func(context.Context) (*sqlx.DB, error)) bool {
41 logger := ctxlog.FromContext(ctx).WithField("ID", dbl.key)
43 for ; ; time.Sleep(retryDelay) {
46 // Another goroutine is already locked/waiting
47 // on this lock. Wait for them to release.
56 if err == context.Canceled {
59 } else if err != nil {
60 logger.WithError(err).Info("error getting database pool")
64 conn, err := db.Conn(ctx)
65 if err == context.Canceled {
68 } else if err != nil {
69 logger.WithError(err).Info("error getting database connection")
74 err = conn.QueryRowContext(ctx, `SELECT pg_try_advisory_lock($1)`, dbl.key).Scan(&locked)
75 if err == context.Canceled {
77 } else if err != nil {
78 logger.WithError(err).Info("error getting pg_try_advisory_lock")
86 err = conn.QueryRowContext(ctx, `SELECT client_addr, client_port FROM pg_stat_activity WHERE pid IN
87 (SELECT pid FROM pg_locks
88 WHERE locktype = $1 AND objid = $2)`, "advisory", dbl.key).Scan(&host, &port)
90 logger.WithError(err).Info("error getting other client info")
92 heldBy := net.JoinHostPort(host, fmt.Sprintf("%d", port))
93 if lastHeldBy != heldBy {
94 logger.WithField("DBClient", heldBy).Info("waiting for other process to release lock")
102 logger.Debug("acquired pg_advisory_lock")
103 dbl.ctx, dbl.getdb, dbl.conn = ctx, getdb, conn
109 // Check confirms that the lock is still active (i.e., the session is
110 // still alive), and re-acquires if needed. Panics if Lock is not
113 // Returns false if the context passed to Lock() is canceled before
114 // the lock is confirmed or reacquired.
115 func (dbl *DBLocker) Check() bool {
117 err := dbl.conn.PingContext(dbl.ctx)
118 if err == context.Canceled {
121 } else if err == nil {
122 ctxlog.FromContext(dbl.ctx).WithField("ID", dbl.key).Debug("connection still alive")
126 ctxlog.FromContext(dbl.ctx).WithError(err).Info("database connection ping failed")
129 ctx, getdb := dbl.ctx, dbl.getdb
131 return dbl.Lock(ctx, getdb)
134 func (dbl *DBLocker) Unlock() {
136 defer dbl.mtx.Unlock()
138 _, err := dbl.conn.ExecContext(context.Background(), `SELECT pg_advisory_unlock($1)`, dbl.key)
140 ctxlog.FromContext(dbl.ctx).WithError(err).WithField("ID", dbl.key).Info("error releasing pg_advisory_lock")
142 ctxlog.FromContext(dbl.ctx).WithField("ID", dbl.key).Debug("released pg_advisory_lock")