[ARVADOS] updated: 29cb16bdd726b09f8cded0d245ed6a72c62eaf8b

Git user git at public.curoverse.com
Wed Jun 7 11:39:50 EDT 2017


Summary of changes:
 services/keep-web/cache.go        | 259 ++++++++++++++++++++++++++++++++++++++
 services/keep-web/cache_test.go   | 104 +++++++++++++++
 services/keep-web/handler.go      |  23 +++-
 services/keep-web/handler_test.go |   8 +-
 services/keep-web/main.go         |  10 ++
 services/keep-web/server_test.go  |  14 +--
 services/keep-web/status_test.go  |  46 +++++++
 services/keep-web/usage.go        |  20 +++
 8 files changed, 471 insertions(+), 13 deletions(-)
 create mode 100644 services/keep-web/cache.go
 create mode 100644 services/keep-web/cache_test.go
 create mode 100644 services/keep-web/status_test.go

       via  29cb16bdd726b09f8cded0d245ed6a72c62eaf8b (commit)
       via  7e0409e3b5e2fd80f7a74feb719dbc41c2193ef6 (commit)
       via  72e22b49ec2721d3a1369da768d3d74fa9c079c3 (commit)
       via  9d894536b8a7044fdfa168f81a38c3408e6cd7b4 (commit)
       via  d22400822b3489c621e5dbd902749b1a547ca579 (commit)
       via  6246cacc17e8b90519143e717b7241e527678be9 (commit)
       via  eb21778470992713458dc42e4a115bd0a619c5de (commit)
       via  a15a137c593d24649e2960471d7273acad695186 (commit)
       via  46580c9d21578ec9f0638c9cb464703c7ede00b3 (commit)
       via  0940739d41d71154937354696634a2989a9db9fc (commit)
       via  6fef82432952b78f61c7be80820e88804b3a47d7 (commit)
      from  f7b0474852fa8f270605c4cb5eeaf85c910c421e (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 29cb16bdd726b09f8cded0d245ed6a72c62eaf8b
Merge: f7b0474 7e0409e
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jun 7 11:39:22 2017 -0400

    Merge branch '11809-keep-web-cache'
    
    closes #11809


commit 7e0409e3b5e2fd80f7a74feb719dbc41c2193ef6
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jun 7 11:28:22 2017 -0400

    11809: Add tests for /status.json.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curoverse.com>

diff --git a/services/keep-web/status_test.go b/services/keep-web/status_test.go
new file mode 100644
index 0000000..e40c1d0
--- /dev/null
+++ b/services/keep-web/status_test.go
@@ -0,0 +1,46 @@
+package main
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	"gopkg.in/check.v1"
+)
+
+func (s *UnitSuite) TestStatus(c *check.C) {
+	h := handler{Config: DefaultConfig()}
+	u, _ := url.Parse("http://keep-web.example/status.json")
+	req := &http.Request{
+		Method:     "GET",
+		Host:       u.Host,
+		URL:        u,
+		RequestURI: u.RequestURI(),
+	}
+	resp := httptest.NewRecorder()
+	h.ServeHTTP(resp, req)
+	c.Check(resp.Code, check.Equals, http.StatusOK)
+
+	var status map[string]interface{}
+	err := json.NewDecoder(resp.Body).Decode(&status)
+	c.Check(err, check.IsNil)
+	c.Check(status["Cache.Requests"], check.Equals, float64(0))
+}
+
+func (s *IntegrationSuite) TestNoStatusFromVHost(c *check.C) {
+	u, _ := url.Parse("http://" + arvadostest.FooCollection + "--keep-web.example/status.json")
+	req := &http.Request{
+		Method:     "GET",
+		Host:       u.Host,
+		URL:        u,
+		RequestURI: u.RequestURI(),
+		Header: http.Header{
+			"Authorization": {"OAuth2 " + arvadostest.ActiveToken},
+		},
+	}
+	resp := httptest.NewRecorder()
+	s.testServer.Handler.ServeHTTP(resp, req)
+	c.Check(resp.Code, check.Equals, http.StatusNotFound)
+}

commit 72e22b49ec2721d3a1369da768d3d74fa9c079c3
Merge: 9d89453 f7b0474
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jun 7 10:51:15 2017 -0400

    11809: Merge branch 'master' into 11809-keep-web-cache


commit 9d894536b8a7044fdfa168f81a38c3408e6cd7b4
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jun 7 10:47:37 2017 -0400

    11809: Skip lookups in forceReload case. Add forceReload tests.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curoverse.com>

diff --git a/services/keep-web/cache.go b/services/keep-web/cache.go
index 7970165..ab7c653 100644
--- a/services/keep-web/cache.go
+++ b/services/keep-web/cache.go
@@ -89,7 +89,8 @@ func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceRelo
 
 	permOK := false
 	permKey := arv.ApiToken + "\000" + targetID
-	if ent, cached := c.permissions.Get(permKey); cached {
+	if forceReload {
+	} else if ent, cached := c.permissions.Get(permKey); cached {
 		ent := ent.(*cachedPermission)
 		if ent.expire.Before(time.Now()) {
 			c.permissions.Remove(permKey)
@@ -102,6 +103,7 @@ func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceRelo
 	var pdh string
 	if arvadosclient.PDHMatch(targetID) {
 		pdh = targetID
+	} else if forceReload {
 	} else if ent, cached := c.pdhs.Get(targetID); cached {
 		ent := ent.(*cachedPDH)
 		if ent.expire.Before(time.Now()) {
@@ -112,13 +114,14 @@ func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceRelo
 		}
 	}
 
-	collection := c.lookupCollection(pdh)
-
-	if collection != nil && permOK && !forceReload {
-		return collection, nil
+	var collection map[string]interface{}
+	if pdh != "" {
+		collection = c.lookupCollection(pdh)
 	}
 
-	if collection != nil {
+	if collection != nil && permOK {
+		return collection, nil
+	} else if collection != nil {
 		// Ask API for current PDH for this targetID. Most
 		// likely, the cached PDH is still correct; if so,
 		// _and_ the current token has permission, we can
diff --git a/services/keep-web/cache_test.go b/services/keep-web/cache_test.go
index 9538543..f8aa2b1 100644
--- a/services/keep-web/cache_test.go
+++ b/services/keep-web/cache_test.go
@@ -66,3 +66,39 @@ func (s *UnitSuite) TestCache(c *check.C) {
 	c.Check(cache.Stats().PDHHits, check.Equals, uint64(4+0+18))
 	c.Check(cache.Stats().APICalls, check.Equals, uint64(1+1+2))
 }
+
+func (s *UnitSuite) TestCacheForceReloadByPDH(c *check.C) {
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.Equals, nil)
+
+	cache := DefaultConfig().Cache
+
+	for _, forceReload := range []bool{false, true, false, true} {
+		_, err := cache.Get(arv, arvadostest.FooPdh, forceReload)
+		c.Check(err, check.Equals, nil)
+	}
+
+	c.Check(cache.Stats().Requests, check.Equals, uint64(4))
+	c.Check(cache.Stats().CollectionHits, check.Equals, uint64(3))
+	c.Check(cache.Stats().PermissionHits, check.Equals, uint64(1))
+	c.Check(cache.Stats().PDHHits, check.Equals, uint64(0))
+	c.Check(cache.Stats().APICalls, check.Equals, uint64(3))
+}
+
+func (s *UnitSuite) TestCacheForceReloadByUUID(c *check.C) {
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.Equals, nil)
+
+	cache := DefaultConfig().Cache
+
+	for _, forceReload := range []bool{false, true, false, true} {
+		_, err := cache.Get(arv, arvadostest.FooCollection, forceReload)
+		c.Check(err, check.Equals, nil)
+	}
+
+	c.Check(cache.Stats().Requests, check.Equals, uint64(4))
+	c.Check(cache.Stats().CollectionHits, check.Equals, uint64(1))
+	c.Check(cache.Stats().PermissionHits, check.Equals, uint64(1))
+	c.Check(cache.Stats().PDHHits, check.Equals, uint64(1))
+	c.Check(cache.Stats().APICalls, check.Equals, uint64(3))
+}

commit d22400822b3489c621e5dbd902749b1a547ca579
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jun 7 10:32:09 2017 -0400

    11809: Increase default cache sizes.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curoverse.com>

diff --git a/services/keep-web/main.go b/services/keep-web/main.go
index 0cb62d1..f17522c 100644
--- a/services/keep-web/main.go
+++ b/services/keep-web/main.go
@@ -38,10 +38,10 @@ func DefaultConfig() *Config {
 		Listen: ":80",
 		Cache: cache{
 			TTL:                  arvados.Duration(5 * time.Minute),
-			MaxCollectionEntries: 100,
+			MaxCollectionEntries: 1000,
 			MaxCollectionBytes:   100000000,
-			MaxPermissionEntries: 100,
-			MaxUUIDEntries:       100,
+			MaxPermissionEntries: 1000,
+			MaxUUIDEntries:       1000,
 		},
 	}
 }

commit 6246cacc17e8b90519143e717b7241e527678be9
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jun 7 10:27:59 2017 -0400

    11809: Prune collection cache asynchronously.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curoverse.com>

diff --git a/services/keep-web/cache.go b/services/keep-web/cache.go
index b9e3e3e..7970165 100644
--- a/services/keep-web/cache.go
+++ b/services/keep-web/cache.go
@@ -176,7 +176,7 @@ func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceRelo
 		collection: collection,
 	})
 	if int64(len(collection["manifest_text"].(string))) > c.MaxCollectionBytes/int64(c.MaxCollectionEntries) {
-		c.pruneCollections()
+		go c.pruneCollections()
 	}
 	return collection, nil
 }

commit eb21778470992713458dc42e4a115bd0a619c5de
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jun 7 10:27:06 2017 -0400

    11809: Rename FooEntries -> MaxFooEntries in cache config.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curoverse.com>

diff --git a/services/keep-web/cache.go b/services/keep-web/cache.go
index 26f6627..b9e3e3e 100644
--- a/services/keep-web/cache.go
+++ b/services/keep-web/cache.go
@@ -12,11 +12,11 @@ import (
 )
 
 type cache struct {
-	TTL               arvados.Duration
-	CollectionEntries int
-	CollectionBytes   int64
-	PermissionEntries int
-	UUIDEntries       int
+	TTL                  arvados.Duration
+	MaxCollectionEntries int
+	MaxCollectionBytes   int64
+	MaxPermissionEntries int
+	MaxUUIDEntries       int
 
 	stats       cacheStats
 	pdhs        *lru.TwoQueueCache
@@ -51,15 +51,15 @@ type cachedPermission struct {
 
 func (c *cache) setup() {
 	var err error
-	c.pdhs, err = lru.New2Q(c.UUIDEntries)
+	c.pdhs, err = lru.New2Q(c.MaxUUIDEntries)
 	if err != nil {
 		panic(err)
 	}
-	c.collections, err = lru.New2Q(c.CollectionEntries)
+	c.collections, err = lru.New2Q(c.MaxCollectionEntries)
 	if err != nil {
 		panic(err)
 	}
-	c.permissions, err = lru.New2Q(c.PermissionEntries)
+	c.permissions, err = lru.New2Q(c.MaxPermissionEntries)
 	if err != nil {
 		panic(err)
 	}
@@ -175,7 +175,7 @@ func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceRelo
 		expire:     exp,
 		collection: collection,
 	})
-	if int64(len(collection["manifest_text"].(string))) > c.CollectionBytes/int64(c.CollectionEntries) {
+	if int64(len(collection["manifest_text"].(string))) > c.MaxCollectionBytes/int64(c.MaxCollectionEntries) {
 		c.pruneCollections()
 	}
 	return collection, nil
@@ -212,7 +212,7 @@ func (c *cache) pruneCollections() {
 		}
 	}
 	for i, k := range keys {
-		if size <= c.CollectionBytes {
+		if size <= c.MaxCollectionBytes {
 			break
 		}
 		if expired[i] {
diff --git a/services/keep-web/main.go b/services/keep-web/main.go
index bcdcd8b..0cb62d1 100644
--- a/services/keep-web/main.go
+++ b/services/keep-web/main.go
@@ -37,11 +37,11 @@ func DefaultConfig() *Config {
 	return &Config{
 		Listen: ":80",
 		Cache: cache{
-			TTL:               arvados.Duration(5 * time.Minute),
-			CollectionEntries: 100,
-			CollectionBytes:   100000000,
-			PermissionEntries: 100,
-			UUIDEntries:       100,
+			TTL:                  arvados.Duration(5 * time.Minute),
+			MaxCollectionEntries: 100,
+			MaxCollectionBytes:   100000000,
+			MaxPermissionEntries: 100,
+			MaxUUIDEntries:       100,
 		},
 	}
 }
diff --git a/services/keep-web/usage.go b/services/keep-web/usage.go
index b854f53..603dd4d 100644
--- a/services/keep-web/usage.go
+++ b/services/keep-web/usage.go
@@ -71,19 +71,19 @@ Cache.TTL:
 
     Maximum time to cache collection data and permission checks.
 
-Cache.CollectionEntries:
+Cache.MaxCollectionEntries:
 
     Maximum number of collection cache entries.
 
-Cache.CollectionBytes:
+Cache.MaxCollectionBytes:
 
     Approximate memory limit for collection cache.
 
-Cache.PermissionEntries:
+Cache.MaxPermissionEntries:
 
     Maximum number of permission cache entries.
 
-Cache.UUIDEntries:
+Cache.MaxUUIDEntries:
 
     Maximum number of UUID cache entries.
 

commit a15a137c593d24649e2960471d7273acad695186
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jun 7 10:23:41 2017 -0400

    11809: Add /status.json handler.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curoverse.com>

diff --git a/services/keep-web/cache.go b/services/keep-web/cache.go
index e8bf399..26f6627 100644
--- a/services/keep-web/cache.go
+++ b/services/keep-web/cache.go
@@ -26,11 +26,13 @@ type cache struct {
 }
 
 type cacheStats struct {
-	Requests       uint64
-	CollectionHits uint64
-	PDHHits        uint64
-	PermissionHits uint64
-	APICalls       uint64
+	Requests          uint64 `json:"Cache.Requests"`
+	CollectionBytes   uint64 `json:"Cache.CollectionBytes"`
+	CollectionEntries int    `json:"Cache.CollectionEntries"`
+	CollectionHits    uint64 `json:"Cache.CollectionHits"`
+	PDHHits           uint64 `json:"Cache.UUIDHits"`
+	PermissionHits    uint64 `json:"Cache.PermissionHits"`
+	APICalls          uint64 `json:"Cache.APICalls"`
 }
 
 type cachedPDH struct {
@@ -68,12 +70,15 @@ var selectPDH = map[string]interface{}{
 }
 
 func (c *cache) Stats() cacheStats {
+	c.setupOnce.Do(c.setup)
 	return cacheStats{
-		Requests:       atomic.LoadUint64(&c.stats.Requests),
-		CollectionHits: atomic.LoadUint64(&c.stats.CollectionHits),
-		PDHHits:        atomic.LoadUint64(&c.stats.PDHHits),
-		PermissionHits: atomic.LoadUint64(&c.stats.PermissionHits),
-		APICalls:       atomic.LoadUint64(&c.stats.APICalls),
+		Requests:          atomic.LoadUint64(&c.stats.Requests),
+		CollectionBytes:   c.collectionBytes(),
+		CollectionEntries: c.collections.Len(),
+		CollectionHits:    atomic.LoadUint64(&c.stats.CollectionHits),
+		PDHHits:           atomic.LoadUint64(&c.stats.PDHHits),
+		PermissionHits:    atomic.LoadUint64(&c.stats.PermissionHits),
+		APICalls:          atomic.LoadUint64(&c.stats.APICalls),
 	}
 }
 
@@ -219,6 +224,20 @@ func (c *cache) pruneCollections() {
 	}
 }
 
+// collectionBytes returns the approximate memory size of the
+// collection cache.
+func (c *cache) collectionBytes() uint64 {
+	var size uint64
+	for _, k := range c.collections.Keys() {
+		v, ok := c.collections.Peek(k)
+		if !ok {
+			continue
+		}
+		size += uint64(len(v.(*cachedCollection).collection["manifest_text"].(string)))
+	}
+	return size
+}
+
 func (c *cache) lookupCollection(pdh string) map[string]interface{} {
 	if pdh == "" {
 		return nil
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 45cd27f..42c37b8 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -1,6 +1,7 @@
 package main
 
 import (
+	"encoding/json"
 	"fmt"
 	"html"
 	"io"
@@ -67,6 +68,15 @@ func (h *handler) setup() {
 	keepclient.RefreshServiceDiscoveryOnSIGHUP()
 }
 
+func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
+	status := struct {
+		cacheStats
+	}{
+		cacheStats: h.Config.Cache.Stats(),
+	}
+	json.NewEncoder(w).Encode(status)
+}
+
 // ServeHTTP implements http.Handler.
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	h.setupOnce.Do(h.setup)
@@ -151,6 +161,9 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		// http://ID.collections.example/PATH...
 		credentialsOK = true
 		targetPath = pathParts
+	} else if r.URL.Path == "/status.json" {
+		h.serveStatus(w, r)
+		return
 	} else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
 		// /c=ID/PATH...
 		targetID = parseCollectionIDFromURL(pathParts[0][2:])

commit 46580c9d21578ec9f0638c9cb464703c7ede00b3
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Jun 6 15:21:28 2017 -0400

    11809: More cache tests.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curoverse.com>

diff --git a/services/keep-web/cache_test.go b/services/keep-web/cache_test.go
index ef3a760..9538543 100644
--- a/services/keep-web/cache_test.go
+++ b/services/keep-web/cache_test.go
@@ -41,9 +41,28 @@ func (s *UnitSuite) TestCache(c *check.C) {
 		c.Check(coll["portable_data_hash"], check.Equals, arvadostest.FooPdh)
 		c.Check(coll["manifest_text"].(string)[:2], check.Equals, ". ")
 	}
-	c.Check(cache.Stats().Requests, check.Equals, uint64(7))
-	c.Check(cache.Stats().CollectionHits, check.Equals, uint64(6))
-	c.Check(cache.Stats().PermissionHits, check.Equals, uint64(5))
-	c.Check(cache.Stats().PDHHits, check.Equals, uint64(4))
-	c.Check(cache.Stats().APICalls, check.Equals, uint64(2))
+	c.Check(cache.Stats().Requests, check.Equals, uint64(5+2))
+	c.Check(cache.Stats().CollectionHits, check.Equals, uint64(4+2))
+	c.Check(cache.Stats().PermissionHits, check.Equals, uint64(4+1))
+	c.Check(cache.Stats().PDHHits, check.Equals, uint64(4+0))
+	c.Check(cache.Stats().APICalls, check.Equals, uint64(1+1))
+
+	// Alternating between two collections N times should produce
+	// only 2 more API calls.
+	arv.ApiToken = arvadostest.AdminToken
+	for i := 0; i < 20; i++ {
+		var target string
+		if i%2 == 0 {
+			target = arvadostest.HelloWorldCollection
+		} else {
+			target = arvadostest.FooBarDirCollection
+		}
+		_, err := cache.Get(arv, target, false)
+		c.Check(err, check.Equals, nil)
+	}
+	c.Check(cache.Stats().Requests, check.Equals, uint64(5+2+20))
+	c.Check(cache.Stats().CollectionHits, check.Equals, uint64(4+2+18))
+	c.Check(cache.Stats().PermissionHits, check.Equals, uint64(4+1+18))
+	c.Check(cache.Stats().PDHHits, check.Equals, uint64(4+0+18))
+	c.Check(cache.Stats().APICalls, check.Equals, uint64(1+1+2))
 }

commit 0940739d41d71154937354696634a2989a9db9fc
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Jun 6 15:12:53 2017 -0400

    11809: Add unit test for cache.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curoverse.com>

diff --git a/services/keep-web/cache_test.go b/services/keep-web/cache_test.go
new file mode 100644
index 0000000..ef3a760
--- /dev/null
+++ b/services/keep-web/cache_test.go
@@ -0,0 +1,49 @@
+package main
+
+import (
+	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	"gopkg.in/check.v1"
+)
+
+func (s *UnitSuite) TestCache(c *check.C) {
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.Equals, nil)
+
+	cache := DefaultConfig().Cache
+
+	// Hit the same collection 5 times using the same token. Only
+	// the first req should cause an API call; the next 4 should
+	// hit all caches.
+	arv.ApiToken = arvadostest.AdminToken
+	for i := 0; i < 5; i++ {
+		coll, err := cache.Get(arv, arvadostest.FooCollection, false)
+		c.Check(err, check.Equals, nil)
+		c.Assert(coll, check.NotNil)
+		c.Check(coll["portable_data_hash"], check.Equals, arvadostest.FooPdh)
+		c.Check(coll["manifest_text"].(string)[:2], check.Equals, ". ")
+	}
+	c.Check(cache.Stats().Requests, check.Equals, uint64(5))
+	c.Check(cache.Stats().CollectionHits, check.Equals, uint64(4))
+	c.Check(cache.Stats().PermissionHits, check.Equals, uint64(4))
+	c.Check(cache.Stats().PDHHits, check.Equals, uint64(4))
+	c.Check(cache.Stats().APICalls, check.Equals, uint64(1))
+
+	// Hit the same collection 2 more times, this time requesting
+	// it by PDH and using a different token. The first req should
+	// miss the permission cache. Both reqs should hit the
+	// Collection cache and skip the API lookup.
+	arv.ApiToken = arvadostest.ActiveToken
+	for i := 0; i < 2; i++ {
+		coll, err := cache.Get(arv, arvadostest.FooPdh, false)
+		c.Check(err, check.Equals, nil)
+		c.Assert(coll, check.NotNil)
+		c.Check(coll["portable_data_hash"], check.Equals, arvadostest.FooPdh)
+		c.Check(coll["manifest_text"].(string)[:2], check.Equals, ". ")
+	}
+	c.Check(cache.Stats().Requests, check.Equals, uint64(7))
+	c.Check(cache.Stats().CollectionHits, check.Equals, uint64(6))
+	c.Check(cache.Stats().PermissionHits, check.Equals, uint64(5))
+	c.Check(cache.Stats().PDHHits, check.Equals, uint64(4))
+	c.Check(cache.Stats().APICalls, check.Equals, uint64(2))
+}

commit 6fef82432952b78f61c7be80820e88804b3a47d7
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Jun 6 11:57:34 2017 -0400

    11809: Cache permission and collection lookups.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curoverse.com>

diff --git a/services/keep-web/cache.go b/services/keep-web/cache.go
new file mode 100644
index 0000000..e8bf399
--- /dev/null
+++ b/services/keep-web/cache.go
@@ -0,0 +1,237 @@
+package main
+
+import (
+	"fmt"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+	"github.com/hashicorp/golang-lru"
+)
+
+type cache struct {
+	TTL               arvados.Duration
+	CollectionEntries int
+	CollectionBytes   int64
+	PermissionEntries int
+	UUIDEntries       int
+
+	stats       cacheStats
+	pdhs        *lru.TwoQueueCache
+	collections *lru.TwoQueueCache
+	permissions *lru.TwoQueueCache
+	setupOnce   sync.Once
+}
+
+type cacheStats struct {
+	Requests       uint64
+	CollectionHits uint64
+	PDHHits        uint64
+	PermissionHits uint64
+	APICalls       uint64
+}
+
+type cachedPDH struct {
+	expire time.Time
+	pdh    string
+}
+
+type cachedCollection struct {
+	expire     time.Time
+	collection map[string]interface{}
+}
+
+type cachedPermission struct {
+	expire time.Time
+}
+
+func (c *cache) setup() {
+	var err error
+	c.pdhs, err = lru.New2Q(c.UUIDEntries)
+	if err != nil {
+		panic(err)
+	}
+	c.collections, err = lru.New2Q(c.CollectionEntries)
+	if err != nil {
+		panic(err)
+	}
+	c.permissions, err = lru.New2Q(c.PermissionEntries)
+	if err != nil {
+		panic(err)
+	}
+}
+
+var selectPDH = map[string]interface{}{
+	"select": []string{"portable_data_hash"},
+}
+
+func (c *cache) Stats() cacheStats {
+	return cacheStats{
+		Requests:       atomic.LoadUint64(&c.stats.Requests),
+		CollectionHits: atomic.LoadUint64(&c.stats.CollectionHits),
+		PDHHits:        atomic.LoadUint64(&c.stats.PDHHits),
+		PermissionHits: atomic.LoadUint64(&c.stats.PermissionHits),
+		APICalls:       atomic.LoadUint64(&c.stats.APICalls),
+	}
+}
+
+func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceReload bool) (map[string]interface{}, error) {
+	c.setupOnce.Do(c.setup)
+
+	atomic.AddUint64(&c.stats.Requests, 1)
+
+	permOK := false
+	permKey := arv.ApiToken + "\000" + targetID
+	if ent, cached := c.permissions.Get(permKey); cached {
+		ent := ent.(*cachedPermission)
+		if ent.expire.Before(time.Now()) {
+			c.permissions.Remove(permKey)
+		} else {
+			permOK = true
+			atomic.AddUint64(&c.stats.PermissionHits, 1)
+		}
+	}
+
+	var pdh string
+	if arvadosclient.PDHMatch(targetID) {
+		pdh = targetID
+	} else if ent, cached := c.pdhs.Get(targetID); cached {
+		ent := ent.(*cachedPDH)
+		if ent.expire.Before(time.Now()) {
+			c.pdhs.Remove(targetID)
+		} else {
+			pdh = ent.pdh
+			atomic.AddUint64(&c.stats.PDHHits, 1)
+		}
+	}
+
+	collection := c.lookupCollection(pdh)
+
+	if collection != nil && permOK && !forceReload {
+		return collection, nil
+	}
+
+	if collection != nil {
+		// Ask API for current PDH for this targetID. Most
+		// likely, the cached PDH is still correct; if so,
+		// _and_ the current token has permission, we can
+		// use our cached manifest.
+		atomic.AddUint64(&c.stats.APICalls, 1)
+		var current map[string]interface{}
+		err := arv.Get("collections", targetID, selectPDH, &current)
+		if err != nil {
+			return nil, err
+		}
+		if checkPDH, ok := current["portable_data_hash"].(string); !ok {
+			return nil, fmt.Errorf("API response for %q had no PDH", targetID)
+		} else if checkPDH == pdh {
+			exp := time.Now().Add(time.Duration(c.TTL))
+			c.permissions.Add(permKey, &cachedPermission{
+				expire: exp,
+			})
+			if pdh != targetID {
+				c.pdhs.Add(targetID, &cachedPDH{
+					expire: exp,
+					pdh:    pdh,
+				})
+			}
+			return collection, err
+		} else {
+			// PDH changed, but now we know we have
+			// permission -- and maybe we already have the
+			// new PDH in the cache.
+			if coll := c.lookupCollection(checkPDH); coll != nil {
+				return coll, nil
+			}
+		}
+	}
+
+	// Collection manifest is not cached.
+	atomic.AddUint64(&c.stats.APICalls, 1)
+	err := arv.Get("collections", targetID, nil, &collection)
+	if err != nil {
+		return nil, err
+	}
+	pdh, ok := collection["portable_data_hash"].(string)
+	if !ok {
+		return nil, fmt.Errorf("API response for %q had no PDH", targetID)
+	}
+	exp := time.Now().Add(time.Duration(c.TTL))
+	c.permissions.Add(permKey, &cachedPermission{
+		expire: exp,
+	})
+	c.pdhs.Add(targetID, &cachedPDH{
+		expire: exp,
+		pdh:    pdh,
+	})
+	c.collections.Add(pdh, &cachedCollection{
+		expire:     exp,
+		collection: collection,
+	})
+	if int64(len(collection["manifest_text"].(string))) > c.CollectionBytes/int64(c.CollectionEntries) {
+		c.pruneCollections()
+	}
+	return collection, nil
+}
+
+// pruneCollections checks the total bytes occupied by manifest_text
+// in the collection cache and removes old entries as needed to bring
+// the total size down to CollectionBytes. It also deletes all expired
+// entries.
+//
+// pruneCollections does not aim to be perfectly correct when there is
+// concurrent cache activity.
+func (c *cache) pruneCollections() {
+	var size int64
+	now := time.Now()
+	keys := c.collections.Keys()
+	entsize := make([]int, len(keys))
+	expired := make([]bool, len(keys))
+	for i, k := range keys {
+		v, ok := c.collections.Peek(k)
+		if !ok {
+			continue
+		}
+		ent := v.(*cachedCollection)
+		n := len(ent.collection["manifest_text"].(string))
+		size += int64(n)
+		entsize[i] = n
+		expired[i] = ent.expire.Before(now)
+	}
+	for i, k := range keys {
+		if expired[i] {
+			c.collections.Remove(k)
+			size -= int64(entsize[i])
+		}
+	}
+	for i, k := range keys {
+		if size <= c.CollectionBytes {
+			break
+		}
+		if expired[i] {
+			// already removed this entry in the previous loop
+			continue
+		}
+		c.collections.Remove(k)
+		size -= int64(entsize[i])
+	}
+}
+
+func (c *cache) lookupCollection(pdh string) map[string]interface{} {
+	if pdh == "" {
+		return nil
+	} else if ent, cached := c.collections.Get(pdh); !cached {
+		return nil
+	} else {
+		ent := ent.(*cachedCollection)
+		if ent.expire.Before(time.Now()) {
+			c.collections.Remove(pdh)
+			return nil
+		} else {
+			atomic.AddUint64(&c.stats.CollectionHits, 1)
+			return ent.collection
+		}
+	}
+}
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index b7e39c6..45cd27f 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -275,11 +275,17 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		targetPath = targetPath[1:]
 	}
 
+	forceReload := false
+	if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
+		forceReload = true
+	}
+
+	var collection map[string]interface{}
 	tokenResult := make(map[string]int)
-	collection := make(map[string]interface{})
 	found := false
 	for _, arv.ApiToken = range tokens {
-		err := arv.Get("collections", targetID, nil, &collection)
+		var err error
+		collection, err = h.Config.Cache.Get(arv, targetID, forceReload)
 		if err == nil {
 			// Success
 			found = true
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index 57ac219..df0346b 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -19,7 +19,7 @@ var _ = check.Suite(&UnitSuite{})
 type UnitSuite struct{}
 
 func (s *UnitSuite) TestCORSPreflight(c *check.C) {
-	h := handler{Config: &Config{}}
+	h := handler{Config: DefaultConfig()}
 	u, _ := url.Parse("http://keep-web.example/c=" + arvadostest.FooCollection + "/foo")
 	req := &http.Request{
 		Method:     "OPTIONS",
@@ -70,9 +70,9 @@ func (s *UnitSuite) TestInvalidUUID(c *check.C) {
 			RequestURI: u.RequestURI(),
 		}
 		resp := httptest.NewRecorder()
-		h := handler{Config: &Config{
-			AnonymousTokens: []string{arvadostest.AnonymousToken},
-		}}
+		cfg := DefaultConfig()
+		cfg.AnonymousTokens = []string{arvadostest.AnonymousToken}
+		h := handler{Config: cfg}
 		h.ServeHTTP(resp, req)
 		c.Check(resp.Code, check.Equals, http.StatusNotFound)
 	}
diff --git a/services/keep-web/main.go b/services/keep-web/main.go
index 5f4cb50..bcdcd8b 100644
--- a/services/keep-web/main.go
+++ b/services/keep-web/main.go
@@ -4,6 +4,7 @@ import (
 	"flag"
 	"log"
 	"os"
+	"time"
 
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
 	"git.curoverse.com/arvados.git/sdk/go/config"
@@ -24,6 +25,8 @@ type Config struct {
 	AttachmentOnlyHost string
 	TrustAllContent    bool
 
+	Cache cache
+
 	// Hack to support old command line flag, which is a bool
 	// meaning "get actual token from environment".
 	deprecatedAllowAnonymous bool
@@ -33,6 +36,13 @@ type Config struct {
 func DefaultConfig() *Config {
 	return &Config{
 		Listen: ":80",
+		Cache: cache{
+			TTL:               arvados.Duration(5 * time.Minute),
+			CollectionEntries: 100,
+			CollectionBytes:   100000000,
+			PermissionEntries: 100,
+			UUIDEntries:       100,
+		},
 	}
 }
 
diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
index 6441364..52fe459 100644
--- a/services/keep-web/server_test.go
+++ b/services/keep-web/server_test.go
@@ -311,13 +311,13 @@ func (s *IntegrationSuite) TearDownSuite(c *check.C) {
 
 func (s *IntegrationSuite) SetUpTest(c *check.C) {
 	arvadostest.ResetEnv()
-	s.testServer = &server{Config: &Config{
-		Client: arvados.Client{
-			APIHost:  testAPIHost,
-			Insecure: true,
-		},
-		Listen: "127.0.0.1:0",
-	}}
+	cfg := DefaultConfig()
+	cfg.Client = arvados.Client{
+		APIHost:  testAPIHost,
+		Insecure: true,
+	}
+	cfg.Listen = "127.0.0.1:0"
+	s.testServer = &server{Config: cfg}
 	err := s.testServer.Start()
 	c.Assert(err, check.Equals, nil)
 }
diff --git a/services/keep-web/usage.go b/services/keep-web/usage.go
index a36bf58..b854f53 100644
--- a/services/keep-web/usage.go
+++ b/services/keep-web/usage.go
@@ -67,5 +67,25 @@ TrustAllContent:
     Serve non-public content from a single origin. Dangerous: read
     docs before using!
 
+Cache.TTL:
+
+    Maximum time to cache collection data and permission checks.
+
+Cache.CollectionEntries:
+
+    Maximum number of collection cache entries.
+
+Cache.CollectionBytes:
+
+    Approximate memory limit for collection cache.
+
+Cache.PermissionEntries:
+
+    Maximum number of permission cache entries.
+
+Cache.UUIDEntries:
+
+    Maximum number of UUID cache entries.
+
 `, exampleConfigFile)
 }

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list