[ARVADOS] created: 2.1.0-1185-g516669845

Git user git at public.arvados.org
Tue Aug 17 18:22:30 UTC 2021


        at  51666984555f2aaf7080ca5f4f9ae8e05f51a74d (commit)


commit 51666984555f2aaf7080ca5f4f9ae8e05f51a74d
Author: Lucas Di Pentima <lucas.dipentima at curii.com>
Date:   Tue Aug 17 15:20:59 2021 -0300

    18004: Fixes a couple of race condition bugs related to caching remote users.
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima at curii.com>

diff --git a/lib/controller/integration_test.go b/lib/controller/integration_test.go
index 8a12f1d2f..685144205 100644
--- a/lib/controller/integration_test.go
+++ b/lib/controller/integration_test.go
@@ -189,7 +189,7 @@ func (s *IntegrationSuite) TestGetCollectionByPDH(c *check.C) {
 }
 
 // Tests bug #18004
-func (s *IntegrationSuite) TestRemoteTokenCacheRace(c *check.C) {
+func (s *IntegrationSuite) TestRemoteUserAndTokenCacheRace(c *check.C) {
 	conn1 := s.testClusters["z1111"].Conn()
 	rootctx1, _, _ := s.testClusters["z1111"].RootClients()
 	rootctx2, _, _ := s.testClusters["z2222"].RootClients()
@@ -208,7 +208,7 @@ func (s *IntegrationSuite) TestRemoteTokenCacheRace(c *check.C) {
 			defer wg2.Done()
 			wg1.Wait()
 			_, err := conn2.UserGetCurrent(rootctx2, arvados.GetOptions{})
-			c.Check(err, check.IsNil)
+			c.Check(err, check.IsNil, check.Commentf("warm up phase failed"))
 		}()
 	}
 	wg1.Done()
@@ -224,7 +224,7 @@ func (s *IntegrationSuite) TestRemoteTokenCacheRace(c *check.C) {
 			wg1.Wait()
 			// Retrieve the remote collection from cluster z2222.
 			_, err := conn2.UserGetCurrent(userctx1, arvados.GetOptions{})
-			c.Check(err, check.IsNil)
+			c.Check(err, check.IsNil, check.Commentf("testing phase failed"))
 		}()
 	}
 	wg1.Done()
diff --git a/services/api/app/models/api_client_authorization.rb b/services/api/app/models/api_client_authorization.rb
index 52f2cee06..7c7ed759c 100644
--- a/services/api/app/models/api_client_authorization.rb
+++ b/services/api/app/models/api_client_authorization.rb
@@ -319,7 +319,17 @@ class ApiClientAuthorization < ArvadosModel
         user.last_name = "from cluster #{remote_user_prefix}"
       end
 
-      user.save!
+      begin
+        user.save!
+      rescue ActiveRecord::RecordInvalid, ActiveRecord::RecordNotUnique
+        Rails.logger.debug("remote user #{remote_user['uuid']} already exists, retrying...")
+        # Some other request won the race: retry fetching the user record.
+        user = User.find_by_uuid(remote_user['uuid'])
+        if !user
+          Rails.logger.warn("cannot find or create remote user #{remote_user['uuid']}")
+          return nil
+        end
+      end
 
       if user.is_invited && !remote_user['is_invited']
         # Remote user is not "invited" state, they should be unsetup, which
@@ -364,12 +374,24 @@ class ApiClientAuthorization < ArvadosModel
       exp = [db_current_time + Rails.configuration.Login.RemoteTokenRefresh,
              remote_token.andand['expires_at']].compact.min
       scopes = remote_token.andand['scopes'] || ['all']
-      auth = ApiClientAuthorization.find_or_create_by(uuid: token_uuid) do |auth|
-        auth.user = user
-        auth.api_token = stored_secret
-        auth.api_client_id = 0
-        auth.scopes = scopes
-        auth.expires_at = exp
+      begin
+        retries ||= 0
+        auth = ApiClientAuthorization.find_or_create_by(uuid: token_uuid) do |auth|
+          auth.user = user
+          auth.api_token = stored_secret
+          auth.api_client_id = 0
+          auth.scopes = scopes
+          auth.expires_at = exp
+        end
+      rescue ActiveRecord::RecordNotUnique
+        Rails.logger.debug("cached remote token #{token_uuid} already exists, retrying...")
+        # Some other request won the race: retry just once before erroring out
+        if (retries += 1) <= 1
+          retry
+        else
+          Rails.logger.warn("cannot find or create cached remote token #{token_uuid}")
+          return nil
+        end
       end
       auth.update_attributes!(user: user,
                               api_token: stored_secret,

commit e51a4e1e019c085f428d90780d9a45ffab68cf57
Author: Lucas Di Pentima <lucas.dipentima at curii.com>
Date:   Tue Aug 10 12:15:04 2021 -0300

    18004: Adds test exposing the race condition.
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima at curii.com>

diff --git a/lib/controller/integration_test.go b/lib/controller/integration_test.go
index 26f0dbb0d..8a12f1d2f 100644
--- a/lib/controller/integration_test.go
+++ b/lib/controller/integration_test.go
@@ -20,6 +20,7 @@ import (
 	"path/filepath"
 	"strconv"
 	"strings"
+	"sync"
 
 	"git.arvados.org/arvados.git/lib/boot"
 	"git.arvados.org/arvados.git/lib/config"
@@ -187,6 +188,49 @@ func (s *IntegrationSuite) TestGetCollectionByPDH(c *check.C) {
 	c.Check(coll.PortableDataHash, check.Equals, pdh)
 }
 
+// Tests bug #18004
+func (s *IntegrationSuite) TestRemoteTokenCacheRace(c *check.C) {
+	conn1 := s.testClusters["z1111"].Conn()
+	rootctx1, _, _ := s.testClusters["z1111"].RootClients()
+	rootctx2, _, _ := s.testClusters["z2222"].RootClients()
+	conn2 := s.testClusters["z2222"].Conn()
+	userctx1, _, _, _ := s.testClusters["z1111"].UserClients(rootctx1, c, conn1, "user2 at example.com", true)
+
+	var wg1, wg2 sync.WaitGroup
+	creqs := 100
+
+	// Make concurrent requests to z2222 with a local token to make sure more
+	// than one worker is listening.
+	wg1.Add(1)
+	for i := 0; i < creqs; i++ {
+		wg2.Add(1)
+		go func() {
+			defer wg2.Done()
+			wg1.Wait()
+			_, err := conn2.UserGetCurrent(rootctx2, arvados.GetOptions{})
+			c.Check(err, check.IsNil)
+		}()
+	}
+	wg1.Done()
+	wg2.Wait()
+
+	// Real test pass -- use a new remote token than the one used in the warm-up
+	// phase.
+	wg1.Add(1)
+	for i := 0; i < creqs; i++ {
+		wg2.Add(1)
+		go func() {
+			defer wg2.Done()
+			wg1.Wait()
+			// Retrieve the remote collection from cluster z2222.
+			_, err := conn2.UserGetCurrent(userctx1, arvados.GetOptions{})
+			c.Check(err, check.IsNil)
+		}()
+	}
+	wg1.Done()
+	wg2.Wait()
+}
+
 func (s *IntegrationSuite) TestS3WithFederatedToken(c *check.C) {
 	if _, err := exec.LookPath("s3cmd"); err != nil {
 		c.Skip("s3cmd not in PATH")
@@ -502,7 +546,7 @@ func (s *IntegrationSuite) TestRequestIDHeader(c *check.C) {
 }
 
 // We test the direct access to the database
-// normally an integration test would not have a database access, but  in this case we need
+// normally an integration test would not have a database access, but in this case we need
 // to test tokens that are secret, so there is no API response that will give them back
 func (s *IntegrationSuite) dbConn(c *check.C, clusterID string) (*sql.DB, *sql.Conn) {
 	ctx := context.Background()

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list