[ARVADOS] created: 6fef82432952b78f61c7be80820e88804b3a47d7
Git user
git at public.curoverse.com
Tue Jun 6 11:59:24 EDT 2017
at 6fef82432952b78f61c7be80820e88804b3a47d7 (commit)
commit 6fef82432952b78f61c7be80820e88804b3a47d7
Author: Tom Clegg <tom at curoverse.com>
Date: Tue Jun 6 11:57:34 2017 -0400
11809: Cache permission and collection lookups.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curoverse.com>
diff --git a/services/keep-web/cache.go b/services/keep-web/cache.go
new file mode 100644
index 0000000..e8bf399
--- /dev/null
+++ b/services/keep-web/cache.go
@@ -0,0 +1,237 @@
+package main
+
+import (
+ "fmt"
+ "sync"
+ "sync/atomic"
+ "time"
+
+ "git.curoverse.com/arvados.git/sdk/go/arvados"
+ "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+ "github.com/hashicorp/golang-lru"
+)
+
+type cache struct {
+ TTL arvados.Duration
+ CollectionEntries int
+ CollectionBytes int64
+ PermissionEntries int
+ UUIDEntries int
+
+ stats cacheStats
+ pdhs *lru.TwoQueueCache
+ collections *lru.TwoQueueCache
+ permissions *lru.TwoQueueCache
+ setupOnce sync.Once
+}
+
+type cacheStats struct {
+ Requests uint64
+ CollectionHits uint64
+ PDHHits uint64
+ PermissionHits uint64
+ APICalls uint64
+}
+
+type cachedPDH struct {
+ expire time.Time
+ pdh string
+}
+
+type cachedCollection struct {
+ expire time.Time
+ collection map[string]interface{}
+}
+
+type cachedPermission struct {
+ expire time.Time
+}
+
+func (c *cache) setup() {
+ var err error
+ c.pdhs, err = lru.New2Q(c.UUIDEntries)
+ if err != nil {
+ panic(err)
+ }
+ c.collections, err = lru.New2Q(c.CollectionEntries)
+ if err != nil {
+ panic(err)
+ }
+ c.permissions, err = lru.New2Q(c.PermissionEntries)
+ if err != nil {
+ panic(err)
+ }
+}
+
+var selectPDH = map[string]interface{}{
+ "select": []string{"portable_data_hash"},
+}
+
+func (c *cache) Stats() cacheStats {
+ return cacheStats{
+ Requests: atomic.LoadUint64(&c.stats.Requests),
+ CollectionHits: atomic.LoadUint64(&c.stats.CollectionHits),
+ PDHHits: atomic.LoadUint64(&c.stats.PDHHits),
+ PermissionHits: atomic.LoadUint64(&c.stats.PermissionHits),
+ APICalls: atomic.LoadUint64(&c.stats.APICalls),
+ }
+}
+
+func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceReload bool) (map[string]interface{}, error) {
+ c.setupOnce.Do(c.setup)
+
+ atomic.AddUint64(&c.stats.Requests, 1)
+
+ permOK := false
+ permKey := arv.ApiToken + "\000" + targetID
+ if ent, cached := c.permissions.Get(permKey); cached {
+ ent := ent.(*cachedPermission)
+ if ent.expire.Before(time.Now()) {
+ c.permissions.Remove(permKey)
+ } else {
+ permOK = true
+ atomic.AddUint64(&c.stats.PermissionHits, 1)
+ }
+ }
+
+ var pdh string
+ if arvadosclient.PDHMatch(targetID) {
+ pdh = targetID
+ } else if ent, cached := c.pdhs.Get(targetID); cached {
+ ent := ent.(*cachedPDH)
+ if ent.expire.Before(time.Now()) {
+ c.pdhs.Remove(targetID)
+ } else {
+ pdh = ent.pdh
+ atomic.AddUint64(&c.stats.PDHHits, 1)
+ }
+ }
+
+ collection := c.lookupCollection(pdh)
+
+ if collection != nil && permOK && !forceReload {
+ return collection, nil
+ }
+
+ if collection != nil {
+ // Ask API for current PDH for this targetID. Most
+ // likely, the cached PDH is still correct; if so,
+ // _and_ the current token has permission, we can
+ // use our cached manifest.
+ atomic.AddUint64(&c.stats.APICalls, 1)
+ var current map[string]interface{}
+ err := arv.Get("collections", targetID, selectPDH, ¤t)
+ if err != nil {
+ return nil, err
+ }
+ if checkPDH, ok := current["portable_data_hash"].(string); !ok {
+ return nil, fmt.Errorf("API response for %q had no PDH", targetID)
+ } else if checkPDH == pdh {
+ exp := time.Now().Add(time.Duration(c.TTL))
+ c.permissions.Add(permKey, &cachedPermission{
+ expire: exp,
+ })
+ if pdh != targetID {
+ c.pdhs.Add(targetID, &cachedPDH{
+ expire: exp,
+ pdh: pdh,
+ })
+ }
+ return collection, err
+ } else {
+ // PDH changed, but now we know we have
+ // permission -- and maybe we already have the
+ // new PDH in the cache.
+ if coll := c.lookupCollection(checkPDH); coll != nil {
+ return coll, nil
+ }
+ }
+ }
+
+ // Collection manifest is not cached.
+ atomic.AddUint64(&c.stats.APICalls, 1)
+ err := arv.Get("collections", targetID, nil, &collection)
+ if err != nil {
+ return nil, err
+ }
+ pdh, ok := collection["portable_data_hash"].(string)
+ if !ok {
+ return nil, fmt.Errorf("API response for %q had no PDH", targetID)
+ }
+ exp := time.Now().Add(time.Duration(c.TTL))
+ c.permissions.Add(permKey, &cachedPermission{
+ expire: exp,
+ })
+ c.pdhs.Add(targetID, &cachedPDH{
+ expire: exp,
+ pdh: pdh,
+ })
+ c.collections.Add(pdh, &cachedCollection{
+ expire: exp,
+ collection: collection,
+ })
+ if int64(len(collection["manifest_text"].(string))) > c.CollectionBytes/int64(c.CollectionEntries) {
+ c.pruneCollections()
+ }
+ return collection, nil
+}
+
+// pruneCollections checks the total bytes occupied by manifest_text
+// in the collection cache and removes old entries as needed to bring
+// the total size down to CollectionBytes. It also deletes all expired
+// entries.
+//
+// pruneCollections does not aim to be perfectly correct when there is
+// concurrent cache activity.
+func (c *cache) pruneCollections() {
+ var size int64
+ now := time.Now()
+ keys := c.collections.Keys()
+ entsize := make([]int, len(keys))
+ expired := make([]bool, len(keys))
+ for i, k := range keys {
+ v, ok := c.collections.Peek(k)
+ if !ok {
+ continue
+ }
+ ent := v.(*cachedCollection)
+ n := len(ent.collection["manifest_text"].(string))
+ size += int64(n)
+ entsize[i] = n
+ expired[i] = ent.expire.Before(now)
+ }
+ for i, k := range keys {
+ if expired[i] {
+ c.collections.Remove(k)
+ size -= int64(entsize[i])
+ }
+ }
+ for i, k := range keys {
+ if size <= c.CollectionBytes {
+ break
+ }
+ if expired[i] {
+ // already removed this entry in the previous loop
+ continue
+ }
+ c.collections.Remove(k)
+ size -= int64(entsize[i])
+ }
+}
+
+func (c *cache) lookupCollection(pdh string) map[string]interface{} {
+ if pdh == "" {
+ return nil
+ } else if ent, cached := c.collections.Get(pdh); !cached {
+ return nil
+ } else {
+ ent := ent.(*cachedCollection)
+ if ent.expire.Before(time.Now()) {
+ c.collections.Remove(pdh)
+ return nil
+ } else {
+ atomic.AddUint64(&c.stats.CollectionHits, 1)
+ return ent.collection
+ }
+ }
+}
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index b7e39c6..45cd27f 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -275,11 +275,17 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
targetPath = targetPath[1:]
}
+ forceReload := false
+ if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
+ forceReload = true
+ }
+
+ var collection map[string]interface{}
tokenResult := make(map[string]int)
- collection := make(map[string]interface{})
found := false
for _, arv.ApiToken = range tokens {
- err := arv.Get("collections", targetID, nil, &collection)
+ var err error
+ collection, err = h.Config.Cache.Get(arv, targetID, forceReload)
if err == nil {
// Success
found = true
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index 57ac219..df0346b 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -19,7 +19,7 @@ var _ = check.Suite(&UnitSuite{})
type UnitSuite struct{}
func (s *UnitSuite) TestCORSPreflight(c *check.C) {
- h := handler{Config: &Config{}}
+ h := handler{Config: DefaultConfig()}
u, _ := url.Parse("http://keep-web.example/c=" + arvadostest.FooCollection + "/foo")
req := &http.Request{
Method: "OPTIONS",
@@ -70,9 +70,9 @@ func (s *UnitSuite) TestInvalidUUID(c *check.C) {
RequestURI: u.RequestURI(),
}
resp := httptest.NewRecorder()
- h := handler{Config: &Config{
- AnonymousTokens: []string{arvadostest.AnonymousToken},
- }}
+ cfg := DefaultConfig()
+ cfg.AnonymousTokens = []string{arvadostest.AnonymousToken}
+ h := handler{Config: cfg}
h.ServeHTTP(resp, req)
c.Check(resp.Code, check.Equals, http.StatusNotFound)
}
diff --git a/services/keep-web/main.go b/services/keep-web/main.go
index 5f4cb50..bcdcd8b 100644
--- a/services/keep-web/main.go
+++ b/services/keep-web/main.go
@@ -4,6 +4,7 @@ import (
"flag"
"log"
"os"
+ "time"
"git.curoverse.com/arvados.git/sdk/go/arvados"
"git.curoverse.com/arvados.git/sdk/go/config"
@@ -24,6 +25,8 @@ type Config struct {
AttachmentOnlyHost string
TrustAllContent bool
+ Cache cache
+
// Hack to support old command line flag, which is a bool
// meaning "get actual token from environment".
deprecatedAllowAnonymous bool
@@ -33,6 +36,13 @@ type Config struct {
func DefaultConfig() *Config {
return &Config{
Listen: ":80",
+ Cache: cache{
+ TTL: arvados.Duration(5 * time.Minute),
+ CollectionEntries: 100,
+ CollectionBytes: 100000000,
+ PermissionEntries: 100,
+ UUIDEntries: 100,
+ },
}
}
diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
index 6441364..52fe459 100644
--- a/services/keep-web/server_test.go
+++ b/services/keep-web/server_test.go
@@ -311,13 +311,13 @@ func (s *IntegrationSuite) TearDownSuite(c *check.C) {
func (s *IntegrationSuite) SetUpTest(c *check.C) {
arvadostest.ResetEnv()
- s.testServer = &server{Config: &Config{
- Client: arvados.Client{
- APIHost: testAPIHost,
- Insecure: true,
- },
- Listen: "127.0.0.1:0",
- }}
+ cfg := DefaultConfig()
+ cfg.Client = arvados.Client{
+ APIHost: testAPIHost,
+ Insecure: true,
+ }
+ cfg.Listen = "127.0.0.1:0"
+ s.testServer = &server{Config: cfg}
err := s.testServer.Start()
c.Assert(err, check.Equals, nil)
}
diff --git a/services/keep-web/usage.go b/services/keep-web/usage.go
index a36bf58..b854f53 100644
--- a/services/keep-web/usage.go
+++ b/services/keep-web/usage.go
@@ -67,5 +67,25 @@ TrustAllContent:
Serve non-public content from a single origin. Dangerous: read
docs before using!
+Cache.TTL:
+
+ Maximum time to cache collection data and permission checks.
+
+Cache.CollectionEntries:
+
+ Maximum number of collection cache entries.
+
+Cache.CollectionBytes:
+
+ Approximate memory limit for collection cache.
+
+Cache.PermissionEntries:
+
+ Maximum number of permission cache entries.
+
+Cache.UUIDEntries:
+
+ Maximum number of UUID cache entries.
+
`, exampleConfigFile)
}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list