[arvados] created: 2.5.0-286-gdae3da4d7

git repository hosting git at public.arvados.org
Wed Mar 15 03:37:20 UTC 2023


        at  dae3da4d70bfba901f2147775dd6c07be61416b2 (commit)


commit dae3da4d70bfba901f2147775dd6c07be61416b2
Author: Tom Clegg <tom at curii.com>
Date:   Tue Mar 14 23:35:10 2023 -0400

    20187: Preserve CORS and other misc headers.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/controller/handler.go b/lib/controller/handler.go
index 05b45a0fc..b3d850ac0 100644
--- a/lib/controller/handler.go
+++ b/lib/controller/handler.go
@@ -272,9 +272,9 @@ func (ent *cacheEnt) refresh(path string, do func(*http.Request) (*http.Response
 		return nil, nil, fmt.Errorf("Read error: %w", err)
 	}
 	header := http.Header{}
-	for _, k := range []string{"Content-Type", "Etag", "Last-Modified"} {
-		if v, ok := header[k]; ok {
-			resp.Header[k] = v
+	for k, v := range resp.Header {
+		if !dropHeaders[k] && k != "X-Request-Id" {
+			header[k] = v
 		}
 	}
 	if mediatype, _, err := mime.ParseMediaType(header.Get("Content-Type")); err == nil && mediatype == "application/json" {

commit 1abd17dde53982ed058abf770072123f61ed13af
Author: Tom Clegg <tom at curii.com>
Date:   Thu Mar 9 15:39:00 2023 -0500

    20187: Test discovery doc cache.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/controller/handler.go b/lib/controller/handler.go
index ce3243f6c..05b45a0fc 100644
--- a/lib/controller/handler.go
+++ b/lib/controller/handler.go
@@ -238,7 +238,19 @@ func (ent *cacheEnt) refresh(path string, do func(*http.Request) (*http.Response
 		// another goroutine refreshed successfully while we
 		// were waiting for refreshLock
 		return header, body, nil
+	} else if body != nil {
+		// Cache is present, but expired. We'll try to refresh
+		// below. Meanwhile, other refresh() calls will queue
+		// up for refreshLock -- and we don't want them to
+		// turn into N upstream requests, even if upstream is
+		// failing.  (If we succeed we'll update the expiry
+		// time again below with the real cacheTTL -- this
+		// just takes care of the error case.)
+		ent.mtx.Lock()
+		ent.refreshAfter = time.Now().Add(time.Second)
+		ent.mtx.Unlock()
 	}
+
 	ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(time.Minute))
 	defer cancel()
 	// 0.0.0.0:0 is just a placeholder here -- do(), which is
diff --git a/lib/controller/handler_test.go b/lib/controller/handler_test.go
index 52bc4f907..4a7e1ad78 100644
--- a/lib/controller/handler_test.go
+++ b/lib/controller/handler_test.go
@@ -16,6 +16,7 @@ import (
 	"net/url"
 	"os"
 	"strings"
+	"sync"
 	"testing"
 	"time"
 
@@ -37,11 +38,12 @@ func Test(t *testing.T) {
 var _ = check.Suite(&HandlerSuite{})
 
 type HandlerSuite struct {
-	cluster *arvados.Cluster
-	handler *Handler
-	logbuf  *bytes.Buffer
-	ctx     context.Context
-	cancel  context.CancelFunc
+	cluster  *arvados.Cluster
+	handler  *Handler
+	railsSpy *arvadostest.Proxy
+	logbuf   *bytes.Buffer
+	ctx      context.Context
+	cancel   context.CancelFunc
 }
 
 func (s *HandlerSuite) SetUpTest(c *check.C) {
@@ -55,6 +57,8 @@ func (s *HandlerSuite) SetUpTest(c *check.C) {
 	s.cluster.API.RequestTimeout = arvados.Duration(5 * time.Minute)
 	s.cluster.TLS.Insecure = true
 	arvadostest.SetServiceURL(&s.cluster.Services.RailsAPI, "https://"+os.Getenv("ARVADOS_TEST_API_HOST"))
+	s.railsSpy = arvadostest.NewProxy(c, s.cluster.Services.RailsAPI)
+	arvadostest.SetServiceURL(&s.cluster.Services.RailsAPI, s.railsSpy.URL.String())
 	arvadostest.SetServiceURL(&s.cluster.Services.Controller, "http://localhost:/")
 	s.handler = newHandler(s.ctx, s.cluster, "", prometheus.NewRegistry()).(*Handler)
 }
@@ -93,6 +97,153 @@ func (s *HandlerSuite) TestConfigExport(c *check.C) {
 	}
 }
 
+func (s *HandlerSuite) TestDiscoveryDocCache(c *check.C) {
+	countRailsReqs := func() int {
+		n := 0
+		for _, req := range s.railsSpy.RequestDumps {
+			if bytes.Contains(req, []byte("/discovery/v1/apis/arvados/v1/rest")) {
+				n++
+			}
+		}
+		return n
+	}
+	getDD := func() int {
+		req := httptest.NewRequest(http.MethodGet, "/discovery/v1/apis/arvados/v1/rest", nil)
+		resp := httptest.NewRecorder()
+		s.handler.ServeHTTP(resp, req)
+		if resp.Code == http.StatusOK {
+			var dd arvados.DiscoveryDocument
+			err := json.Unmarshal(resp.Body.Bytes(), &dd)
+			c.Check(err, check.IsNil)
+			c.Check(dd.Schemas["Collection"].UUIDPrefix, check.Equals, "4zz18")
+		}
+		return resp.Code
+	}
+	getDDConcurrently := func(n int, expectCode int, checkArgs ...interface{}) *sync.WaitGroup {
+		var wg sync.WaitGroup
+		for i := 0; i < n; i++ {
+			wg.Add(1)
+			go func() {
+				defer wg.Done()
+				c.Check(getDD(), check.Equals, append([]interface{}{expectCode}, checkArgs...)...)
+			}()
+		}
+		return &wg
+	}
+	clearCache := func() {
+		for path := range s.handler.cache {
+			s.handler.cache[path] = &cacheEnt{}
+		}
+	}
+	expireCache := func() {
+		for _, ent := range s.handler.cache {
+			ent.refreshAfter = time.Now()
+		}
+	}
+	waitPendingUpdates := func() {
+		for _, ent := range s.handler.cache {
+			ent.refreshLock.Lock()
+			defer ent.refreshLock.Unlock()
+			ent.mtx.Lock()
+			defer ent.mtx.Unlock()
+		}
+	}
+
+	// Easy path: first req fetches, subsequent reqs use cache.
+	c.Check(countRailsReqs(), check.Equals, 0)
+	c.Check(getDD(), check.Equals, http.StatusOK)
+	c.Check(countRailsReqs(), check.Equals, 1)
+	c.Check(getDD(), check.Equals, http.StatusOK)
+	c.Check(countRailsReqs(), check.Equals, 1)
+	c.Check(getDD(), check.Equals, http.StatusOK)
+	c.Check(countRailsReqs(), check.Equals, 1)
+
+	// To guarantee we have concurrent requests, we set up
+	// railsSpy to hold up the Handler's outgoing requests until
+	// we send to (or close) holdReqs.
+	holdReqs := make(chan struct{})
+	s.railsSpy.Director = func(*http.Request) {
+		<-holdReqs
+	}
+
+	// Race at startup: first req fetches, other concurrent reqs
+	// wait for the initial fetch to complete, then all return.
+	clearCache()
+	reqsBefore := countRailsReqs()
+	wg := getDDConcurrently(5, http.StatusOK, check.Commentf("race at startup"))
+	close(holdReqs)
+	wg.Wait()
+	c.Check(countRailsReqs(), check.Equals, reqsBefore+1)
+
+	// Race after expiry: concurrent reqs return the cached data
+	// but initiate a new fetch in the background.
+	expireCache()
+	holdReqs = make(chan struct{})
+	wg = getDDConcurrently(5, http.StatusOK, check.Commentf("race after expiry"))
+	reqsBefore = countRailsReqs()
+	close(holdReqs)
+	wg.Wait()
+	for deadline := time.Now().Add(time.Second); time.Now().Before(deadline) && countRailsReqs() < reqsBefore+1; {
+		time.Sleep(time.Second / 100)
+	}
+	c.Check(countRailsReqs(), check.Equals, reqsBefore+1)
+
+	// Configure railsSpy to return an error when wantError==true.
+	var wantError bool
+	s.railsSpy.Director = func(req *http.Request) {
+		if wantError {
+			req.Method = "MAKE-COFFEE"
+		}
+	}
+
+	// Error at startup (empty cache) => caller gets error, and we
+	// make an upstream attempt for each incoming request because
+	// we have nothing better to return
+	clearCache()
+	wantError = true
+	reqsBefore = countRailsReqs()
+	holdReqs = make(chan struct{})
+	wg = getDDConcurrently(5, http.StatusBadGateway, check.Commentf("error at startup"))
+	close(holdReqs)
+	wg.Wait()
+	c.Check(countRailsReqs(), check.Equals, reqsBefore+5)
+
+	// Error condition clears => caller gets OK, cache is warmed
+	// up
+	wantError = false
+	reqsBefore = countRailsReqs()
+	getDDConcurrently(5, http.StatusOK, check.Commentf("success after errors at startup")).Wait()
+	c.Check(countRailsReqs(), check.Equals, reqsBefore+1)
+
+	// Error with warm cache => caller gets OK (with no attempt to
+	// re-fetch)
+	wantError = true
+	reqsBefore = countRailsReqs()
+	getDDConcurrently(5, http.StatusOK, check.Commentf("error with warm cache")).Wait()
+	c.Check(countRailsReqs(), check.Equals, reqsBefore)
+	expireCache()
+
+	// Error with expired cache => caller gets OK with stale data
+	// while the re-fetch is attempted in the background
+	reqsBefore = countRailsReqs()
+	holdReqs = make(chan struct{})
+	getDDConcurrently(5, http.StatusOK, check.Commentf("error with expired cache")).Wait()
+	close(holdReqs)
+	// Only one attempt to re-fetch (holdReqs ensured the first
+	// update took long enough for the last incoming request to
+	// arrive)
+	c.Check(countRailsReqs(), check.Equals, reqsBefore+1)
+
+	waitPendingUpdates()
+	expireCache()
+	wantError = false
+	reqsBefore = countRailsReqs()
+	holdReqs = make(chan struct{})
+	getDDConcurrently(5, http.StatusOK, check.Commentf("refresh cache after error condition clears")).Wait()
+	close(holdReqs)
+	c.Check(countRailsReqs(), check.Equals, reqsBefore+1)
+}
+
 func (s *HandlerSuite) TestVocabularyExport(c *check.C) {
 	voc := `{
 		"strict_tags": false,
diff --git a/sdk/go/arvadostest/proxy.go b/sdk/go/arvadostest/proxy.go
index 48700d8b1..9940ddd3d 100644
--- a/sdk/go/arvadostest/proxy.go
+++ b/sdk/go/arvadostest/proxy.go
@@ -26,6 +26,10 @@ type Proxy struct {
 
 	// A dump of each request that has been proxied.
 	RequestDumps [][]byte
+
+	// If non-nil, func will be called on each incoming request
+	// before proxying it.
+	Director func(*http.Request)
 }
 
 // NewProxy returns a new Proxy that saves a dump of each reqeust
@@ -63,6 +67,9 @@ func NewProxy(c *check.C, svc arvados.Service) *Proxy {
 		URL:    u,
 	}
 	rp.Director = func(r *http.Request) {
+		if proxy.Director != nil {
+			proxy.Director(r)
+		}
 		dump, _ := httputil.DumpRequest(r, true)
 		proxy.RequestDumps = append(proxy.RequestDumps, dump)
 		r.URL.Scheme = target.Scheme

commit dceebc9953147b08b68a36330e213f7280851450
Author: Tom Clegg <tom at curii.com>
Date:   Thu Mar 9 09:27:30 2023 -0500

    20187: 1 minute timeout for getting discovery doc from rails.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/controller/handler.go b/lib/controller/handler.go
index c3bbfd2d9..ce3243f6c 100644
--- a/lib/controller/handler.go
+++ b/lib/controller/handler.go
@@ -239,10 +239,12 @@ func (ent *cacheEnt) refresh(path string, do func(*http.Request) (*http.Response
 		// were waiting for refreshLock
 		return header, body, nil
 	}
-	// 0.0.0.0 is just a placeholder here -- do(), which is
+	ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(time.Minute))
+	defer cancel()
+	// 0.0.0.0:0 is just a placeholder here -- do(), which is
 	// localClusterRequest(), will replace the scheme and host
 	// parts with the real proxy destination.
-	req, err := http.NewRequest(http.MethodGet, "http://0.0.0.0/"+path, nil)
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://0.0.0.0:0/"+path, nil)
 	if err != nil {
 		return nil, nil, err
 	}

commit 04826743ea647f67fef414761a4ca2536523226b
Author: Tom Clegg <tom at curii.com>
Date:   Wed Mar 8 13:35:35 2023 -0500

    20187: Update test.
    
    Discovery doc can no longer be counted on to proxy through to rails.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/controller/handler_test.go b/lib/controller/handler_test.go
index 76eab9ca1..52bc4f907 100644
--- a/lib/controller/handler_test.go
+++ b/lib/controller/handler_test.go
@@ -210,7 +210,7 @@ func (s *HandlerSuite) TestProxyDiscoveryDoc(c *check.C) {
 // etc.
 func (s *HandlerSuite) TestRequestCancel(c *check.C) {
 	ctx, cancel := context.WithCancel(context.Background())
-	req := httptest.NewRequest("GET", "/discovery/v1/apis/arvados/v1/rest", nil).WithContext(ctx)
+	req := httptest.NewRequest("GET", "/static/login_failure", nil).WithContext(ctx)
 	resp := httptest.NewRecorder()
 	cancel()
 	s.handler.ServeHTTP(resp, req)

commit 4334ff2b99833bb31c1228cececfbcab451f5511
Author: Tom Clegg <tom at curii.com>
Date:   Wed Mar 8 12:51:00 2023 -0500

    20187: Validate and cache discovery doc in controller.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/controller/handler.go b/lib/controller/handler.go
index 4810ec3c2..c3bbfd2d9 100644
--- a/lib/controller/handler.go
+++ b/lib/controller/handler.go
@@ -6,12 +6,17 @@ package controller
 
 import (
 	"context"
+	"encoding/json"
+	"errors"
 	"fmt"
+	"io/ioutil"
+	"mime"
 	"net/http"
 	"net/http/httptest"
 	"net/url"
 	"strings"
 	"sync"
+	"time"
 
 	"git.arvados.org/arvados.git/lib/controller/api"
 	"git.arvados.org/arvados.git/lib/controller/federation"
@@ -20,6 +25,7 @@ import (
 	"git.arvados.org/arvados.git/lib/controller/router"
 	"git.arvados.org/arvados.git/lib/ctrlctx"
 	"git.arvados.org/arvados.git/sdk/go/arvados"
+	"git.arvados.org/arvados.git/sdk/go/ctxlog"
 	"git.arvados.org/arvados.git/sdk/go/health"
 	"git.arvados.org/arvados.git/sdk/go/httpserver"
 
@@ -39,6 +45,8 @@ type Handler struct {
 	insecureClient *http.Client
 	dbConnector    ctrlctx.DBConnector
 	limitLogCreate chan struct{}
+
+	cache map[string]*cacheEnt
 }
 
 func (h *Handler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
@@ -162,6 +170,9 @@ func (h *Handler) setup() {
 	h.proxy = &proxy{
 		Name: "arvados-controller",
 	}
+	h.cache = map[string]*cacheEnt{
+		"/discovery/v1/apis/arvados/v1/rest": &cacheEnt{},
+	}
 
 	go h.trashSweepWorker()
 	go h.containerLogSweepWorker()
@@ -208,7 +219,100 @@ func (h *Handler) limitLogCreateRequests(w http.ResponseWriter, req *http.Reques
 	next.ServeHTTP(w, req)
 }
 
+// cacheEnt implements a basic stale-while-revalidate cache, suitable
+// for the Arvados discovery document.
+type cacheEnt struct {
+	mtx          sync.Mutex
+	header       http.Header
+	body         []byte
+	refreshAfter time.Time
+	refreshLock  sync.Mutex
+}
+
+const cacheTTL = 5 * time.Minute
+
+func (ent *cacheEnt) refresh(path string, do func(*http.Request) (*http.Response, error)) (http.Header, []byte, error) {
+	ent.refreshLock.Lock()
+	defer ent.refreshLock.Unlock()
+	if header, body, needRefresh := ent.response(); !needRefresh {
+		// another goroutine refreshed successfully while we
+		// were waiting for refreshLock
+		return header, body, nil
+	}
+	// 0.0.0.0 is just a placeholder here -- do(), which is
+	// localClusterRequest(), will replace the scheme and host
+	// parts with the real proxy destination.
+	req, err := http.NewRequest(http.MethodGet, "http://0.0.0.0/"+path, nil)
+	if err != nil {
+		return nil, nil, err
+	}
+	resp, err := do(req)
+	if err != nil {
+		return nil, nil, err
+	}
+	if resp.StatusCode != http.StatusOK {
+		return nil, nil, fmt.Errorf("HTTP status %d", resp.StatusCode)
+	}
+	body, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		return nil, nil, fmt.Errorf("Read error: %w", err)
+	}
+	header := http.Header{}
+	for _, k := range []string{"Content-Type", "Etag", "Last-Modified"} {
+		if v, ok := header[k]; ok {
+			resp.Header[k] = v
+		}
+	}
+	if mediatype, _, err := mime.ParseMediaType(header.Get("Content-Type")); err == nil && mediatype == "application/json" {
+		if !json.Valid(body) {
+			return nil, nil, errors.New("invalid JSON encoding in response")
+		}
+	}
+	ent.mtx.Lock()
+	defer ent.mtx.Unlock()
+	ent.header = header
+	ent.body = body
+	ent.refreshAfter = time.Now().Add(cacheTTL)
+	return ent.header, ent.body, nil
+}
+
+func (ent *cacheEnt) response() (http.Header, []byte, bool) {
+	ent.mtx.Lock()
+	defer ent.mtx.Unlock()
+	return ent.header, ent.body, ent.refreshAfter.Before(time.Now())
+}
+
+func (ent *cacheEnt) ServeHTTP(ctx context.Context, w http.ResponseWriter, path string, do func(*http.Request) (*http.Response, error)) {
+	header, body, needRefresh := ent.response()
+	if body == nil {
+		// need to fetch before we can return anything
+		var err error
+		header, body, err = ent.refresh(path, do)
+		if err != nil {
+			http.Error(w, err.Error(), http.StatusBadGateway)
+			return
+		}
+	} else if needRefresh {
+		// re-fetch in background
+		go func() {
+			_, _, err := ent.refresh(path, do)
+			if err != nil {
+				ctxlog.FromContext(ctx).WithError(err).WithField("path", path).Warn("error refreshing cache")
+			}
+		}()
+	}
+	for k, v := range header {
+		w.Header()[k] = v
+	}
+	w.WriteHeader(http.StatusOK)
+	w.Write(body)
+}
+
 func (h *Handler) proxyRailsAPI(w http.ResponseWriter, req *http.Request, next http.Handler) {
+	if ent, ok := h.cache[req.URL.Path]; ok && req.Method == http.MethodGet {
+		ent.ServeHTTP(req.Context(), w, req.URL.Path, h.localClusterRequest)
+		return
+	}
 	resp, err := h.localClusterRequest(req)
 	n, err := h.proxy.ForwardResponse(w, resp, err)
 	if err != nil {

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list