From: Tom Clegg Date: Wed, 15 Sep 2021 07:21:45 +0000 (-0400) Subject: 18051: Fix premature ejection from WebDAV collection cache. X-Git-Tag: 2.3.0~63^2~3 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/38489133d0d3ba3797447272edea7f618cf352df 18051: Fix premature ejection from WebDAV collection cache. Avoid multiple concurrent cache sweeps. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/services/keep-web/cache.go b/services/keep-web/cache.go index a52af80484..16fbd0788c 100644 --- a/services/keep-web/cache.go +++ b/services/keep-web/cache.go @@ -25,9 +25,11 @@ type cache struct { metrics cacheMetrics pdhs *lru.TwoQueueCache collections *lru.TwoQueueCache - permissions *lru.TwoQueueCache sessions *lru.TwoQueueCache setupOnce sync.Once + + chPruneSessions chan struct{} + chPruneCollections chan struct{} } type cacheMetrics struct { @@ -37,7 +39,6 @@ type cacheMetrics struct { sessionEntries prometheus.Gauge collectionHits prometheus.Counter pdhHits prometheus.Counter - permissionHits prometheus.Counter sessionHits prometheus.Counter sessionMisses prometheus.Counter apiCalls prometheus.Counter @@ -65,13 +66,6 @@ func (m *cacheMetrics) setup(reg *prometheus.Registry) { Help: "Number of uuid-to-pdh cache hits.", }) reg.MustRegister(m.pdhHits) - m.permissionHits = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "arvados", - Subsystem: "keepweb_collectioncache", - Name: "permission_hits", - Help: "Number of targetID-to-permission cache hits.", - }) - reg.MustRegister(m.permissionHits) m.apiCalls = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "arvados", Subsystem: "keepweb_collectioncache", @@ -117,8 +111,9 @@ func (m *cacheMetrics) setup(reg *prometheus.Registry) { } type cachedPDH struct { - expire time.Time - pdh string + expire time.Time + refresh time.Time + pdh string } type cachedCollection struct { @@ -149,10 +144,6 @@ func (c *cache) setup() { if err != nil { panic(err) } - c.permissions, err = lru.New2Q(c.config.MaxPermissionEntries) - if err != nil { - panic(err) - } c.sessions, err = lru.New2Q(c.config.MaxSessions) if err != nil { panic(err) @@ -168,6 +159,18 @@ func (c *cache) setup() { c.updateGauges() } }() + c.chPruneCollections = make(chan struct{}, 1) + go func() { + for range c.chPruneCollections { + c.pruneCollections() + } + }() + c.chPruneSessions = make(chan struct{}, 1) + go func() { + for range c.chPruneSessions { + c.pruneSessions() + } + }() } func (c *cache) updateGauges() { @@ -192,19 +195,25 @@ func (c *cache) Update(client *arvados.Client, coll arvados.Collection, fs arvad } coll.ManifestText = m var updated arvados.Collection - defer c.pdhs.Remove(coll.UUID) err = client.RequestAndDecode(&updated, "PATCH", "arvados/v1/collections/"+coll.UUID, nil, map[string]interface{}{ "collection": map[string]string{ "manifest_text": coll.ManifestText, }, }) - if err == nil { - c.collections.Add(client.AuthToken+"\000"+updated.PortableDataHash, &cachedCollection{ - expire: time.Now().Add(time.Duration(c.config.TTL)), - collection: &updated, - }) + if err != nil { + c.pdhs.Remove(coll.UUID) + return err } - return err + c.collections.Add(client.AuthToken+"\000"+updated.PortableDataHash, &cachedCollection{ + expire: time.Now().Add(time.Duration(c.config.TTL)), + collection: &updated, + }) + c.pdhs.Add(coll.UUID, &cachedPDH{ + expire: time.Now().Add(time.Duration(c.config.TTL)), + refresh: time.Now().Add(time.Duration(c.config.UUIDTTL)), + pdh: updated.PortableDataHash, + }) + return nil } // ResetSession unloads any potentially stale state. Should be called @@ -246,7 +255,10 @@ func (c *cache) GetSession(token string) (arvados.CustomFileSystem, *cachedSessi } else { c.metrics.sessionHits.Inc() } - go c.pruneSessions() + select { + case c.chPruneSessions <- struct{}{}: + default: + } fs, _ := sess.fs.Load().(arvados.CustomFileSystem) if fs != nil && !expired { return fs, sess, nil @@ -302,19 +314,7 @@ func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceRelo c.setupOnce.Do(c.setup) c.metrics.requests.Inc() - permOK := false - permKey := arv.ApiToken + "\000" + targetID - if forceReload { - } else if ent, cached := c.permissions.Get(permKey); cached { - ent := ent.(*cachedPermission) - if ent.expire.Before(time.Now()) { - c.permissions.Remove(permKey) - } else { - permOK = true - c.metrics.permissionHits.Inc() - } - } - + var pdhRefresh bool var pdh string if arvadosclient.PDHMatch(targetID) { pdh = targetID @@ -324,22 +324,26 @@ func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceRelo c.pdhs.Remove(targetID) } else { pdh = ent.pdh + pdhRefresh = forceReload || time.Now().After(ent.refresh) c.metrics.pdhHits.Inc() } } - var collection *arvados.Collection - if pdh != "" { - collection = c.lookupCollection(arv.ApiToken + "\000" + pdh) - } - - if collection != nil && permOK { - return collection, nil - } else if collection != nil { - // Ask API for current PDH for this targetID. Most - // likely, the cached PDH is still correct; if so, - // _and_ the current token has permission, we can - // use our cached manifest. + if pdh == "" { + // UUID->PDH mapping is not cached, might as well get + // the whole collection record and be done (below). + } else if cached := c.lookupCollection(arv.ApiToken + "\000" + pdh); cached == nil { + // PDH->manifest is not cached, might as well get the + // whole collection record (below). + } else if !pdhRefresh { + // We looked up UUID->PDH very recently, and we still + // have the manifest for that PDH. + return cached, nil + } else { + // Get current PDH for this UUID (and confirm we still + // have read permission). Most likely, the cached PDH + // is still correct, in which case we can use our + // cached manifest. c.metrics.apiCalls.Inc() var current arvados.Collection err := arv.Get("collections", targetID, selectPDH, ¤t) @@ -347,47 +351,44 @@ func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceRelo return nil, err } if current.PortableDataHash == pdh { - c.permissions.Add(permKey, &cachedPermission{ - expire: time.Now().Add(time.Duration(c.config.TTL)), - }) - if pdh != targetID { - c.pdhs.Add(targetID, &cachedPDH{ - expire: time.Now().Add(time.Duration(c.config.UUIDTTL)), - pdh: pdh, - }) - } - return collection, err + // PDH has not changed, cached manifest is + // correct. + return cached, err } - // PDH changed, but now we know we have - // permission -- and maybe we already have the - // new PDH in the cache. - if coll := c.lookupCollection(arv.ApiToken + "\000" + current.PortableDataHash); coll != nil { - return coll, nil + if cached := c.lookupCollection(arv.ApiToken + "\000" + current.PortableDataHash); cached != nil { + // PDH changed, and we already have the + // manifest for that new PDH. + return cached, nil } } - // Collection manifest is not cached. + // Either UUID->PDH is not cached, or PDH->manifest is not + // cached. + var retrieved arvados.Collection c.metrics.apiCalls.Inc() - err := arv.Get("collections", targetID, nil, &collection) + err := arv.Get("collections", targetID, nil, &retrieved) if err != nil { return nil, err } exp := time.Now().Add(time.Duration(c.config.TTL)) - c.permissions.Add(permKey, &cachedPermission{ - expire: exp, - }) - c.pdhs.Add(targetID, &cachedPDH{ - expire: time.Now().Add(time.Duration(c.config.UUIDTTL)), - pdh: collection.PortableDataHash, - }) - c.collections.Add(arv.ApiToken+"\000"+collection.PortableDataHash, &cachedCollection{ + if targetID != retrieved.PortableDataHash { + c.pdhs.Add(targetID, &cachedPDH{ + expire: exp, + refresh: time.Now().Add(time.Duration(c.config.UUIDTTL)), + pdh: retrieved.PortableDataHash, + }) + } + c.collections.Add(arv.ApiToken+"\000"+retrieved.PortableDataHash, &cachedCollection{ expire: exp, - collection: collection, + collection: &retrieved, }) - if int64(len(collection.ManifestText)) > c.config.MaxCollectionBytes/int64(c.config.MaxCollectionEntries) { - go c.pruneCollections() + if int64(len(retrieved.ManifestText)) > c.config.MaxCollectionBytes/int64(c.config.MaxCollectionEntries) { + select { + case c.chPruneCollections <- struct{}{}: + default: + } } - return collection, nil + return &retrieved, nil } // pruneCollections checks the total bytes occupied by manifest_text diff --git a/services/keep-web/cache_test.go b/services/keep-web/cache_test.go index 0a0eef6e44..80e00b02ad 100644 --- a/services/keep-web/cache_test.go +++ b/services/keep-web/cache_test.go @@ -51,7 +51,6 @@ func (s *UnitSuite) TestCache(c *check.C) { s.checkCacheMetrics(c, cache.registry, "requests 5", "hits 4", - "permission_hits 4", "pdh_hits 4", "api_calls 1") @@ -72,7 +71,6 @@ func (s *UnitSuite) TestCache(c *check.C) { s.checkCacheMetrics(c, cache.registry, "requests 6", "hits 4", - "permission_hits 4", "pdh_hits 4", "api_calls 2") @@ -85,7 +83,6 @@ func (s *UnitSuite) TestCache(c *check.C) { s.checkCacheMetrics(c, cache.registry, "requests 7", "hits 5", - "permission_hits 5", "pdh_hits 4", "api_calls 2") @@ -105,7 +102,6 @@ func (s *UnitSuite) TestCache(c *check.C) { s.checkCacheMetrics(c, cache.registry, "requests 27", "hits 23", - "permission_hits 23", "pdh_hits 22", "api_calls 4") } @@ -125,9 +121,8 @@ func (s *UnitSuite) TestCacheForceReloadByPDH(c *check.C) { s.checkCacheMetrics(c, cache.registry, "requests 4", "hits 3", - "permission_hits 1", "pdh_hits 0", - "api_calls 3") + "api_calls 1") } func (s *UnitSuite) TestCacheForceReloadByUUID(c *check.C) { @@ -145,7 +140,6 @@ func (s *UnitSuite) TestCacheForceReloadByUUID(c *check.C) { s.checkCacheMetrics(c, cache.registry, "requests 4", "hits 3", - "permission_hits 1", "pdh_hits 3", "api_calls 3") } diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go index e883e806cc..a74b85b98d 100644 --- a/services/keep-web/handler_test.go +++ b/services/keep-web/handler_test.go @@ -1067,7 +1067,7 @@ func (s *IntegrationSuite) TestFileContentType(c *check.C) { contentType string }{ {"picture.txt", "BMX bikes are small this year\n", "text/plain; charset=utf-8"}, - {"picture.bmp", "BMX bikes are small this year\n", "image/x-ms-bmp"}, + {"picture.bmp", "BMX bikes are small this year\n", "image/(x-ms-)?bmp"}, {"picture.jpg", "BMX bikes are small this year\n", "image/jpeg"}, {"picture1", "BMX bikes are small this year\n", "image/bmp"}, // content sniff; "BM" is the magic signature for .bmp {"picture2", "Cars are small this year\n", "text/plain; charset=utf-8"}, // content sniff @@ -1103,7 +1103,7 @@ func (s *IntegrationSuite) TestFileContentType(c *check.C) { resp := httptest.NewRecorder() s.testServer.Handler.ServeHTTP(resp, req) c.Check(resp.Code, check.Equals, http.StatusOK) - c.Check(resp.Header().Get("Content-Type"), check.Equals, trial.contentType) + c.Check(resp.Header().Get("Content-Type"), check.Matches, trial.contentType) c.Check(resp.Body.String(), check.Equals, trial.content) } } diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go index 21d7672087..d592295209 100644 --- a/services/keep-web/server_test.go +++ b/services/keep-web/server_test.go @@ -393,7 +393,6 @@ func (s *IntegrationSuite) TestMetrics(c *check.C) { c.Check(counters["arvados_keepweb_collectioncache_api_calls//"].Value, check.Equals, int64(2)) c.Check(counters["arvados_keepweb_collectioncache_hits//"].Value, check.Equals, int64(1)) c.Check(counters["arvados_keepweb_collectioncache_pdh_hits//"].Value, check.Equals, int64(1)) - c.Check(counters["arvados_keepweb_collectioncache_permission_hits//"].Value, check.Equals, int64(1)) c.Check(gauges["arvados_keepweb_collectioncache_cached_manifests//"].Value, check.Equals, float64(1)) // FooCollection's cached manifest size is 45 ("1f4b0....+45") plus one 51-byte blob signature c.Check(gauges["arvados_keepweb_sessions_cached_collection_bytes//"].Value, check.Equals, float64(45+51))