[ARVADOS] updated: 2.3.0-35-g82691f82a

Git user git at public.arvados.org
Mon Nov 22 18:58:16 UTC 2021


Summary of changes:
 doc/admin/upgrading.html.textile.liquid            |   8 ++
 lib/config/config.default.yml                      |   2 +-
 lib/config/generated_config.go                     |   2 +-
 lib/controller/auth_test.go                        |   2 +-
 lib/controller/cmd.go                              |   4 +-
 lib/controller/dblock/dblock.go                    | 101 +++++++++++++++
 lib/controller/federation/conn.go                  |   4 +
 lib/controller/federation_test.go                  |   2 +-
 lib/controller/handler.go                          |   9 +-
 lib/controller/handler_test.go                     |  39 +++++-
 lib/controller/rpc/conn.go                         |   7 ++
 lib/controller/server_test.go                      |  19 +--
 lib/controller/trash.go                            |  33 +++++
 lib/lsf/dispatch.go                                |  44 +++++--
 lib/lsf/dispatch_test.go                           |  94 ++++++++++++--
 lib/lsf/lsfcli.go                                  |  32 ++---
 lib/lsf/lsfqueue.go                                |  10 +-
 sdk/go/arvados/api.go                              |   2 +
 sdk/go/arvados/client.go                           |   2 +
 sdk/go/arvadostest/api.go                          |   4 +
 sdk/go/arvadostest/api_test.go                     |  10 ++
 .../controllers/arvados/v1/schema_controller.rb    |  21 ++++
 .../controllers/sys_controller.rb}                 |  77 +++++-------
 services/api/app/models/collection.rb              |   6 -
 services/api/config/routes.rb                      |   2 +
 .../api/test/functional/sys_controller_test.rb     | 135 +++++++++++++++++++++
 services/api/test/integration/errors_test.rb       |   2 +-
 .../api/test/unit/api_client_authorization_test.rb |   7 --
 services/api/test/unit/collection_test.rb          |  55 ---------
 services/api/test/unit/group_test.rb               |  44 -------
 30 files changed, 546 insertions(+), 233 deletions(-)
 create mode 100644 lib/controller/dblock/dblock.go
 create mode 100644 lib/controller/trash.go
 create mode 100644 sdk/go/arvadostest/api_test.go
 rename services/api/{lib/sweep_trashed_objects.rb => app/controllers/sys_controller.rb} (55%)
 create mode 100644 services/api/test/functional/sys_controller_test.rb

       via  82691f82adecc3baf60b392b2d295ab2381f85bc (commit)
       via  a48908445762d574b41d611021a537c805f7f3ad (commit)
       via  771804a86a5ac53be1142735995dbec6f6949289 (commit)
      from  ba3dfca2da03a57a5f732dd6fb7bbaf744add9a5 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 82691f82adecc3baf60b392b2d295ab2381f85bc
Author: Tom Clegg <tom at curii.com>
Date:   Mon Nov 22 13:45:21 2021 -0500

    Merge branch '18298-lsf-no-suitable-hosts'
    
    refs #18298
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/doc/admin/upgrading.html.textile.liquid b/doc/admin/upgrading.html.textile.liquid
index 39d9b79a7..fbf25fa7a 100644
--- a/doc/admin/upgrading.html.textile.liquid
+++ b/doc/admin/upgrading.html.textile.liquid
@@ -39,6 +39,10 @@ h2(#v2_3_1). v2.3.1 (2021-11-19)
 
 "previous: Upgrading to 2.3.0":#v2_3_0
 
+h3. Default LSF arguments have changed
+
+If you use LSF and your configuration specifies @Containers.LSF.BsubArgumentsList@, you should update it to include the new arguments (@"-R", "select[mem>=%MMB]", ...@, see "configuration reference":{{site.baseurl}}/admin/config.html). Otherwise, containers that are too big to run on any LSF host will remain in the LSF queue instead of being cancelled.
+
 h3. Previously trashed role groups will be deleted
 
 Due to a bug in previous versions, the @DELETE@ operation on a role group caused the group to be flagged as trash in the database, but continue to grant permissions regardless. After upgrading, any role groups that had been trashed this way will be deleted. This might surprise some users if they were relying on permissions that were still in effect due to this bug. Future @DELETE@ operations on a role group will immediately delete the group and revoke the associated permissions.

commit a48908445762d574b41d611021a537c805f7f3ad
Author: Tom Clegg <tom at curii.com>
Date:   Fri Nov 19 17:29:50 2021 -0500

    Merge branch '18298-lsf-no-suitable-hosts'
    
    refs #18298
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index bbdbe6ab9..4e5fa705f 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -1053,7 +1053,7 @@ Clusters:
         # in /tmp on the compute node each time an Arvados container
         # runs. Ensure you have something in place to delete old files
         # from /tmp, or adjust the "-o" and "-e" arguments accordingly.
-        BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]"]
+        BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]"]
 
         # Use sudo to switch to this user account when submitting LSF
         # jobs.
diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go
index 576eb0c00..b82d94809 100644
--- a/lib/config/generated_config.go
+++ b/lib/config/generated_config.go
@@ -1059,7 +1059,7 @@ Clusters:
         # in /tmp on the compute node each time an Arvados container
         # runs. Ensure you have something in place to delete old files
         # from /tmp, or adjust the "-o" and "-e" arguments accordingly.
-        BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]"]
+        BsubArgumentsList: ["-o", "/tmp/crunch-run.%%J.out", "-e", "/tmp/crunch-run.%%J.err", "-J", "%U", "-n", "%C", "-D", "%MMB", "-R", "rusage[mem=%MMB:tmp=%TMB] span[hosts=1]", "-R", "select[mem>=%MMB]", "-R", "select[tmp>=%TMB]", "-R", "select[ncpus>=%C]"]
 
         # Use sudo to switch to this user account when submitting LSF
         # jobs.
diff --git a/lib/lsf/dispatch.go b/lib/lsf/dispatch.go
index 6e35b7de9..537d52a07 100644
--- a/lib/lsf/dispatch.go
+++ b/lib/lsf/dispatch.go
@@ -167,7 +167,7 @@ func (disp *dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Contain
 
 	if ctr.State != dispatch.Locked {
 		// already started by prior invocation
-	} else if _, ok := disp.lsfqueue.JobID(ctr.UUID); !ok {
+	} else if _, ok := disp.lsfqueue.Lookup(ctr.UUID); !ok {
 		disp.logger.Printf("Submitting container %s to LSF", ctr.UUID)
 		cmd := []string{disp.Cluster.Containers.CrunchRunCommand}
 		cmd = append(cmd, "--runtime-engine="+disp.Cluster.Containers.RuntimeEngine)
@@ -181,16 +181,38 @@ func (disp *dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Contain
 	disp.logger.Printf("Start monitoring container %v in state %q", ctr.UUID, ctr.State)
 	defer disp.logger.Printf("Done monitoring container %s", ctr.UUID)
 
-	// If the container disappears from the lsf queue, there is
-	// no point in waiting for further dispatch updates: just
-	// clean up and return.
 	go func(uuid string) {
+		cancelled := false
 		for ctx.Err() == nil {
-			if _, ok := disp.lsfqueue.JobID(uuid); !ok {
+			qent, ok := disp.lsfqueue.Lookup(uuid)
+			if !ok {
+				// If the container disappears from
+				// the lsf queue, there is no point in
+				// waiting for further dispatch
+				// updates: just clean up and return.
 				disp.logger.Printf("container %s job disappeared from LSF queue", uuid)
 				cancel()
 				return
 			}
+			if !cancelled && qent.Stat == "PEND" && strings.Contains(qent.PendReason, "There are no suitable hosts for the job") {
+				disp.logger.Printf("container %s: %s", uuid, qent.PendReason)
+				err := disp.arvDispatcher.Arv.Update("containers", uuid, arvadosclient.Dict{
+					"container": map[string]interface{}{
+						"runtime_status": map[string]string{
+							"error": qent.PendReason,
+						},
+					},
+				}, nil)
+				if err != nil {
+					disp.logger.Printf("error setting runtime_status on %s: %s", uuid, err)
+					continue // retry
+				}
+				err = disp.arvDispatcher.UpdateState(uuid, dispatch.Cancelled)
+				if err != nil {
+					continue // retry (UpdateState() already logged the error)
+				}
+				cancelled = true
+			}
 		}
 	}(ctr.UUID)
 
@@ -236,10 +258,10 @@ func (disp *dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Contain
 	// from the queue.
 	ticker := time.NewTicker(5 * time.Second)
 	defer ticker.Stop()
-	for jobid, ok := disp.lsfqueue.JobID(ctr.UUID); ok; _, ok = disp.lsfqueue.JobID(ctr.UUID) {
-		err := disp.lsfcli.Bkill(jobid)
+	for qent, ok := disp.lsfqueue.Lookup(ctr.UUID); ok; _, ok = disp.lsfqueue.Lookup(ctr.UUID) {
+		err := disp.lsfcli.Bkill(qent.ID)
 		if err != nil {
-			disp.logger.Warnf("%s: bkill(%d): %s", ctr.UUID, jobid, err)
+			disp.logger.Warnf("%s: bkill(%s): %s", ctr.UUID, qent.ID, err)
 		}
 		<-ticker.C
 	}
@@ -262,10 +284,10 @@ func (disp *dispatcher) submit(container arvados.Container, crunchRunCommand []s
 }
 
 func (disp *dispatcher) bkill(ctr arvados.Container) {
-	if jobid, ok := disp.lsfqueue.JobID(ctr.UUID); !ok {
+	if qent, ok := disp.lsfqueue.Lookup(ctr.UUID); !ok {
 		disp.logger.Debugf("bkill(%s): redundant, job not in queue", ctr.UUID)
-	} else if err := disp.lsfcli.Bkill(jobid); err != nil {
-		disp.logger.Warnf("%s: bkill(%d): %s", ctr.UUID, jobid, err)
+	} else if err := disp.lsfcli.Bkill(qent.ID); err != nil {
+		disp.logger.Warnf("%s: bkill(%s): %s", ctr.UUID, qent.ID, err)
 	}
 }
 
diff --git a/lib/lsf/dispatch_test.go b/lib/lsf/dispatch_test.go
index 641453e54..c044df09f 100644
--- a/lib/lsf/dispatch_test.go
+++ b/lib/lsf/dispatch_test.go
@@ -6,6 +6,7 @@ package lsf
 
 import (
 	"context"
+	"encoding/json"
 	"fmt"
 	"math/rand"
 	"os/exec"
@@ -29,7 +30,8 @@ func Test(t *testing.T) {
 var _ = check.Suite(&suite{})
 
 type suite struct {
-	disp *dispatcher
+	disp     *dispatcher
+	crTooBig arvados.ContainerRequest
 }
 
 func (s *suite) TearDownTest(c *check.C) {
@@ -46,6 +48,22 @@ func (s *suite) SetUpTest(c *check.C) {
 	s.disp.lsfcli.stubCommand = func(string, ...string) *exec.Cmd {
 		return exec.Command("bash", "-c", "echo >&2 unimplemented stub; false")
 	}
+	err = arvados.NewClientFromEnv().RequestAndDecode(&s.crTooBig, "POST", "arvados/v1/container_requests", nil, map[string]interface{}{
+		"container_request": map[string]interface{}{
+			"runtime_constraints": arvados.RuntimeConstraints{
+				RAM:   1000000000000,
+				VCPUs: 1,
+			},
+			"container_image":     arvadostest.DockerImage112PDH,
+			"command":             []string{"sleep", "1"},
+			"mounts":              map[string]arvados.Mount{"/mnt/out": {Kind: "tmp", Capacity: 1000}},
+			"output_path":         "/mnt/out",
+			"state":               arvados.ContainerRequestStateCommitted,
+			"priority":            1,
+			"container_count_max": 1,
+		},
+	})
+	c.Assert(err, check.IsNil)
 }
 
 type lsfstub struct {
@@ -82,7 +100,10 @@ func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...
 					"-J", arvadostest.LockedContainerUUID,
 					"-n", "4",
 					"-D", "11701MB",
-					"-R", "rusage[mem=11701MB:tmp=0MB] span[hosts=1]"})
+					"-R", "rusage[mem=11701MB:tmp=0MB] span[hosts=1]",
+					"-R", "select[mem>=11701MB]",
+					"-R", "select[tmp>=0MB]",
+					"-R", "select[ncpus>=4]"})
 				mtx.Lock()
 				fakejobq[nextjobid] = args[1]
 				nextjobid++
@@ -92,7 +113,23 @@ func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...
 					"-J", arvadostest.QueuedContainerUUID,
 					"-n", "4",
 					"-D", "11701MB",
-					"-R", "rusage[mem=11701MB:tmp=45777MB] span[hosts=1]"})
+					"-R", "rusage[mem=11701MB:tmp=45777MB] span[hosts=1]",
+					"-R", "select[mem>=11701MB]",
+					"-R", "select[tmp>=45777MB]",
+					"-R", "select[ncpus>=4]"})
+				mtx.Lock()
+				fakejobq[nextjobid] = args[1]
+				nextjobid++
+				mtx.Unlock()
+			case s.crTooBig.ContainerUUID:
+				c.Check(args, check.DeepEquals, []string{
+					"-J", s.crTooBig.ContainerUUID,
+					"-n", "1",
+					"-D", "954187MB",
+					"-R", "rusage[mem=954187MB:tmp=256MB] span[hosts=1]",
+					"-R", "select[mem>=954187MB]",
+					"-R", "select[tmp>=256MB]",
+					"-R", "select[ncpus>=1]"})
 				mtx.Lock()
 				fakejobq[nextjobid] = args[1]
 				nextjobid++
@@ -103,13 +140,31 @@ func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...
 			}
 			return exec.Command("echo", "submitted job")
 		case "bjobs":
-			c.Check(args, check.DeepEquals, []string{"-u", "all", "-noheader", "-o", "jobid stat job_name:30"})
-			out := ""
+			c.Check(args, check.DeepEquals, []string{"-u", "all", "-o", "jobid stat job_name pend_reason", "-json"})
+			var records []map[string]interface{}
 			for jobid, uuid := range fakejobq {
-				out += fmt.Sprintf(`%d %s %s\n`, jobid, "RUN", uuid)
+				stat, reason := "RUN", ""
+				if uuid == s.crTooBig.ContainerUUID {
+					// The real bjobs output includes a trailing ';' here:
+					stat, reason = "PEND", "There are no suitable hosts for the job;"
+				}
+				records = append(records, map[string]interface{}{
+					"JOBID":       fmt.Sprintf("%d", jobid),
+					"STAT":        stat,
+					"JOB_NAME":    uuid,
+					"PEND_REASON": reason,
+				})
 			}
-			c.Logf("bjobs out: %q", out)
-			return exec.Command("printf", out)
+			out, err := json.Marshal(map[string]interface{}{
+				"COMMAND": "bjobs",
+				"JOBS":    len(fakejobq),
+				"RECORDS": records,
+			})
+			if err != nil {
+				panic(err)
+			}
+			c.Logf("bjobs out: %s", out)
+			return exec.Command("printf", string(out))
 		case "bkill":
 			killid, _ := strconv.Atoi(args[0])
 			if uuid, ok := fakejobq[killid]; !ok {
@@ -137,6 +192,7 @@ func (s *suite) TestSubmit(c *check.C) {
 		sudoUser:  s.disp.Cluster.Containers.LSF.BsubSudoUser,
 	}.stubCommand(s, c)
 	s.disp.Start()
+
 	deadline := time.Now().Add(20 * time.Second)
 	for range time.NewTicker(time.Second).C {
 		if time.Now().After(deadline) {
@@ -144,23 +200,37 @@ func (s *suite) TestSubmit(c *check.C) {
 			break
 		}
 		// "queuedcontainer" should be running
-		if _, ok := s.disp.lsfqueue.JobID(arvadostest.QueuedContainerUUID); !ok {
+		if _, ok := s.disp.lsfqueue.Lookup(arvadostest.QueuedContainerUUID); !ok {
 			continue
 		}
 		// "lockedcontainer" should be cancelled because it
 		// has priority 0 (no matching container requests)
-		if _, ok := s.disp.lsfqueue.JobID(arvadostest.LockedContainerUUID); ok {
+		if _, ok := s.disp.lsfqueue.Lookup(arvadostest.LockedContainerUUID); ok {
+			continue
+		}
+		// "crTooBig" should be cancelled because lsf stub
+		// reports there is no suitable instance type
+		if _, ok := s.disp.lsfqueue.Lookup(s.crTooBig.ContainerUUID); ok {
 			continue
 		}
 		var ctr arvados.Container
 		if err := s.disp.arvDispatcher.Arv.Get("containers", arvadostest.LockedContainerUUID, nil, &ctr); err != nil {
 			c.Logf("error getting container state for %s: %s", arvadostest.LockedContainerUUID, err)
 			continue
-		}
-		if ctr.State != arvados.ContainerStateQueued {
+		} else if ctr.State != arvados.ContainerStateQueued {
 			c.Logf("LockedContainer is not in the LSF queue but its arvados record has not been updated to state==Queued (state is %q)", ctr.State)
 			continue
 		}
+
+		if err := s.disp.arvDispatcher.Arv.Get("containers", s.crTooBig.ContainerUUID, nil, &ctr); err != nil {
+			c.Logf("error getting container state for %s: %s", s.crTooBig.ContainerUUID, err)
+			continue
+		} else if ctr.State != arvados.ContainerStateCancelled {
+			c.Logf("container %s is not in the LSF queue but its arvados record has not been updated to state==Cancelled (state is %q)", s.crTooBig.ContainerUUID, ctr.State)
+			continue
+		} else {
+			c.Check(ctr.RuntimeStatus["error"], check.Equals, "There are no suitable hosts for the job;")
+		}
 		c.Log("reached desired state")
 		break
 	}
diff --git a/lib/lsf/lsfcli.go b/lib/lsf/lsfcli.go
index 9d712ee97..d17559568 100644
--- a/lib/lsf/lsfcli.go
+++ b/lib/lsf/lsfcli.go
@@ -6,6 +6,7 @@ package lsf
 
 import (
 	"bytes"
+	"encoding/json"
 	"fmt"
 	"os"
 	"os/exec"
@@ -16,9 +17,10 @@ import (
 )
 
 type bjobsEntry struct {
-	id   int
-	name string
-	stat string
+	ID         string `json:"JOBID"`
+	Name       string `json:"JOB_NAME"`
+	Stat       string `json:"STAT"`
+	PendReason string `json:"PEND_REASON"`
 }
 
 type lsfcli struct {
@@ -53,29 +55,21 @@ func (cli lsfcli) Bsub(script []byte, args []string, arv *arvados.Client) error
 
 func (cli lsfcli) Bjobs() ([]bjobsEntry, error) {
 	cli.logger.Debugf("Bjobs()")
-	cmd := cli.command("bjobs", "-u", "all", "-noheader", "-o", "jobid stat job_name:30")
+	cmd := cli.command("bjobs", "-u", "all", "-o", "jobid stat job_name pend_reason", "-json")
 	buf, err := cmd.Output()
 	if err != nil {
 		return nil, errWithStderr(err)
 	}
-	var bjobs []bjobsEntry
-	for _, line := range strings.Split(string(buf), "\n") {
-		if line == "" {
-			continue
-		}
-		var ent bjobsEntry
-		if _, err := fmt.Sscan(line, &ent.id, &ent.stat, &ent.name); err != nil {
-			cli.logger.Warnf("ignoring unparsed line in bjobs output: %q", line)
-			continue
-		}
-		bjobs = append(bjobs, ent)
+	var resp struct {
+		Records []bjobsEntry `json:"RECORDS"`
 	}
-	return bjobs, nil
+	err = json.Unmarshal(buf, &resp)
+	return resp.Records, err
 }
 
-func (cli lsfcli) Bkill(id int) error {
-	cli.logger.Infof("Bkill(%d)", id)
-	cmd := cli.command("bkill", fmt.Sprintf("%d", id))
+func (cli lsfcli) Bkill(id string) error {
+	cli.logger.Infof("Bkill(%s)", id)
+	cmd := cli.command("bkill", id)
 	buf, err := cmd.CombinedOutput()
 	if err == nil || strings.Index(string(buf), "already finished") >= 0 {
 		return nil
diff --git a/lib/lsf/lsfqueue.go b/lib/lsf/lsfqueue.go
index 3c4fc4cb8..3ed4d0c18 100644
--- a/lib/lsf/lsfqueue.go
+++ b/lib/lsf/lsfqueue.go
@@ -23,12 +23,12 @@ type lsfqueue struct {
 	latest    map[string]bjobsEntry
 }
 
-// JobID waits for the next queue update (so even a job that was only
+// Lookup waits for the next queue update (so even a job that was only
 // submitted a nanosecond ago will show up) and then returns the LSF
-// job ID corresponding to the given container UUID.
-func (q *lsfqueue) JobID(uuid string) (int, bool) {
+// queue information corresponding to the given container UUID.
+func (q *lsfqueue) Lookup(uuid string) (bjobsEntry, bool) {
 	ent, ok := q.getNext()[uuid]
-	return ent.id, ok
+	return ent, ok
 }
 
 // All waits for the next queue update, then returns the names of all
@@ -94,7 +94,7 @@ func (q *lsfqueue) init() {
 			}
 			next := make(map[string]bjobsEntry, len(ents))
 			for _, ent := range ents {
-				next[ent.name] = ent
+				next[ent.Name] = ent
 			}
 			// Replace q.latest and notify all the
 			// goroutines that the "next update" they

commit 771804a86a5ac53be1142735995dbec6f6949289
Author: Tom Clegg <tom at curii.com>
Date:   Thu Nov 18 15:01:06 2021 -0500

    Merge branch '18339-sweep-trash-lock'
    
    fixes #18339
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/doc/admin/upgrading.html.textile.liquid b/doc/admin/upgrading.html.textile.liquid
index d3329f783..39d9b79a7 100644
--- a/doc/admin/upgrading.html.textile.liquid
+++ b/doc/admin/upgrading.html.textile.liquid
@@ -39,6 +39,10 @@ h2(#v2_3_1). v2.3.1 (2021-11-19)
 
 "previous: Upgrading to 2.3.0":#v2_3_0
 
+h3. Previously trashed role groups will be deleted
+
+Due to a bug in previous versions, the @DELETE@ operation on a role group caused the group to be flagged as trash in the database, but continue to grant permissions regardless. After upgrading, any role groups that had been trashed this way will be deleted. This might surprise some users if they were relying on permissions that were still in effect due to this bug. Future @DELETE@ operations on a role group will immediately delete the group and revoke the associated permissions.
+
 h3. Users are visible to other users by default
 
 When a new user is set up (either via @AutoSetupNewUsers@ config or via Workbench admin interface) the user immediately becomes visible to other users. To revert to the previous behavior, where the administrator must add two users to the same group using the Workbench admin interface in order for the users to see each other, change the new @Users.ActivatedUsersAreVisibleToOthers@ config to @false at .
diff --git a/lib/controller/auth_test.go b/lib/controller/auth_test.go
index 175241146..5d477a766 100644
--- a/lib/controller/auth_test.go
+++ b/lib/controller/auth_test.go
@@ -98,7 +98,7 @@ func (s *AuthSuite) SetUpTest(c *check.C) {
 	cluster.Login.OpenIDConnect.AcceptAccessToken = true
 	cluster.Login.OpenIDConnect.AcceptAccessTokenScope = ""
 
-	s.testHandler = &Handler{Cluster: cluster}
+	s.testHandler = &Handler{Cluster: cluster, BackgroundContext: ctxlog.Context(context.Background(), s.log)}
 	s.testServer = newServerFromIntegrationTestEnv(c)
 	s.testServer.Server.BaseContext = func(net.Listener) context.Context {
 		return ctxlog.Context(context.Background(), s.log)
diff --git a/lib/controller/cmd.go b/lib/controller/cmd.go
index 7ab7f5305..96972251a 100644
--- a/lib/controller/cmd.go
+++ b/lib/controller/cmd.go
@@ -16,6 +16,6 @@ import (
 // Command starts a controller service. See cmd/arvados-server/cmd.go
 var Command cmd.Handler = service.Command(arvados.ServiceNameController, newHandler)
 
-func newHandler(_ context.Context, cluster *arvados.Cluster, _ string, _ *prometheus.Registry) service.Handler {
-	return &Handler{Cluster: cluster}
+func newHandler(ctx context.Context, cluster *arvados.Cluster, _ string, _ *prometheus.Registry) service.Handler {
+	return &Handler{Cluster: cluster, BackgroundContext: ctx}
 }
diff --git a/lib/controller/dblock/dblock.go b/lib/controller/dblock/dblock.go
new file mode 100644
index 000000000..b0d348870
--- /dev/null
+++ b/lib/controller/dblock/dblock.go
@@ -0,0 +1,101 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package dblock
+
+import (
+	"context"
+	"database/sql"
+	"sync"
+	"time"
+
+	"git.arvados.org/arvados.git/sdk/go/ctxlog"
+	"github.com/jmoiron/sqlx"
+)
+
+var (
+	TrashSweep = &DBLocker{key: 10001}
+	retryDelay = 5 * time.Second
+)
+
+// DBLocker uses pg_advisory_lock to maintain a cluster-wide lock for
+// a long-running task like "do X every N seconds".
+type DBLocker struct {
+	key   int
+	mtx   sync.Mutex
+	ctx   context.Context
+	getdb func(context.Context) (*sqlx.DB, error)
+	conn  *sql.Conn // != nil if advisory lock has been acquired
+}
+
+// Lock acquires the advisory lock, waiting/reconnecting if needed.
+func (dbl *DBLocker) Lock(ctx context.Context, getdb func(context.Context) (*sqlx.DB, error)) {
+	logger := ctxlog.FromContext(ctx)
+	for ; ; time.Sleep(retryDelay) {
+		dbl.mtx.Lock()
+		if dbl.conn != nil {
+			// Already locked by another caller in this
+			// process. Wait for them to release.
+			dbl.mtx.Unlock()
+			continue
+		}
+		db, err := getdb(ctx)
+		if err != nil {
+			logger.WithError(err).Infof("error getting database pool")
+			dbl.mtx.Unlock()
+			continue
+		}
+		conn, err := db.Conn(ctx)
+		if err != nil {
+			logger.WithError(err).Info("error getting database connection")
+			dbl.mtx.Unlock()
+			continue
+		}
+		_, err = conn.ExecContext(ctx, `SELECT pg_advisory_lock($1)`, dbl.key)
+		if err != nil {
+			logger.WithError(err).Infof("error getting pg_advisory_lock %d", dbl.key)
+			conn.Close()
+			dbl.mtx.Unlock()
+			continue
+		}
+		logger.Debugf("acquired pg_advisory_lock %d", dbl.key)
+		dbl.ctx, dbl.getdb, dbl.conn = ctx, getdb, conn
+		dbl.mtx.Unlock()
+		return
+	}
+}
+
+// Check confirms that the lock is still active (i.e., the session is
+// still alive), and re-acquires if needed. Panics if Lock is not
+// acquired first.
+func (dbl *DBLocker) Check() {
+	dbl.mtx.Lock()
+	err := dbl.conn.PingContext(dbl.ctx)
+	if err == nil {
+		ctxlog.FromContext(dbl.ctx).Debugf("pg_advisory_lock %d connection still alive", dbl.key)
+		dbl.mtx.Unlock()
+		return
+	}
+	ctxlog.FromContext(dbl.ctx).WithError(err).Info("database connection ping failed")
+	dbl.conn.Close()
+	dbl.conn = nil
+	ctx, getdb := dbl.ctx, dbl.getdb
+	dbl.mtx.Unlock()
+	dbl.Lock(ctx, getdb)
+}
+
+func (dbl *DBLocker) Unlock() {
+	dbl.mtx.Lock()
+	defer dbl.mtx.Unlock()
+	if dbl.conn != nil {
+		_, err := dbl.conn.ExecContext(context.Background(), `SELECT pg_advisory_unlock($1)`, dbl.key)
+		if err != nil {
+			ctxlog.FromContext(dbl.ctx).WithError(err).Infof("error releasing pg_advisory_lock %d", dbl.key)
+		} else {
+			ctxlog.FromContext(dbl.ctx).Debugf("released pg_advisory_lock %d", dbl.key)
+		}
+		dbl.conn.Close()
+		dbl.conn = nil
+	}
+}
diff --git a/lib/controller/federation/conn.go b/lib/controller/federation/conn.go
index d1bf473d7..d4155da10 100644
--- a/lib/controller/federation/conn.go
+++ b/lib/controller/federation/conn.go
@@ -525,6 +525,10 @@ func (conn *Conn) SpecimenDelete(ctx context.Context, options arvados.DeleteOpti
 	return conn.chooseBackend(options.UUID).SpecimenDelete(ctx, options)
 }
 
+func (conn *Conn) SysTrashSweep(ctx context.Context, options struct{}) (struct{}, error) {
+	return conn.local.SysTrashSweep(ctx, options)
+}
+
 var userAttrsCachedFromLoginCluster = map[string]bool{
 	"created_at":  true,
 	"email":       true,
diff --git a/lib/controller/federation_test.go b/lib/controller/federation_test.go
index 211c76198..eb398695b 100644
--- a/lib/controller/federation_test.go
+++ b/lib/controller/federation_test.go
@@ -70,7 +70,7 @@ func (s *FederationSuite) SetUpTest(c *check.C) {
 	cluster.Collections.BlobSigningTTL = arvados.Duration(time.Hour * 24 * 14)
 	arvadostest.SetServiceURL(&cluster.Services.RailsAPI, "http://localhost:1/")
 	arvadostest.SetServiceURL(&cluster.Services.Controller, "http://localhost:/")
-	s.testHandler = &Handler{Cluster: cluster}
+	s.testHandler = &Handler{Cluster: cluster, BackgroundContext: ctxlog.Context(context.Background(), s.log)}
 	s.testServer = newServerFromIntegrationTestEnv(c)
 	s.testServer.Server.BaseContext = func(net.Listener) context.Context {
 		return ctxlog.Context(context.Background(), s.log)
diff --git a/lib/controller/handler.go b/lib/controller/handler.go
index b51d90911..965ba040e 100644
--- a/lib/controller/handler.go
+++ b/lib/controller/handler.go
@@ -32,9 +32,11 @@ import (
 )
 
 type Handler struct {
-	Cluster *arvados.Cluster
+	Cluster           *arvados.Cluster
+	BackgroundContext context.Context
 
 	setupOnce      sync.Once
+	federation     *federation.Conn
 	handlerStack   http.Handler
 	proxy          *proxy
 	secureClient   *http.Client
@@ -103,7 +105,8 @@ func (h *Handler) setup() {
 	healthFuncs := make(map[string]health.Func)
 
 	oidcAuthorizer := localdb.OIDCAccessTokenAuthorizer(h.Cluster, h.db)
-	rtr := router.New(federation.New(h.Cluster, &healthFuncs), router.Config{
+	h.federation = federation.New(h.Cluster, &healthFuncs)
+	rtr := router.New(h.federation, router.Config{
 		MaxRequestSize: h.Cluster.API.MaxRequestSize,
 		WrapCalls:      api.ComposeWrappers(ctrlctx.WrapCallsInTransactions(h.db), oidcAuthorizer.WrapCalls),
 	})
@@ -152,6 +155,8 @@ func (h *Handler) setup() {
 	h.proxy = &proxy{
 		Name: "arvados-controller",
 	}
+
+	go h.trashSweepWorker()
 }
 
 var errDBConnection = errors.New("database connection error")
diff --git a/lib/controller/handler_test.go b/lib/controller/handler_test.go
index f854079f9..a456627c0 100644
--- a/lib/controller/handler_test.go
+++ b/lib/controller/handler_test.go
@@ -35,7 +35,7 @@ var _ = check.Suite(&HandlerSuite{})
 
 type HandlerSuite struct {
 	cluster *arvados.Cluster
-	handler http.Handler
+	handler *Handler
 	ctx     context.Context
 	cancel  context.CancelFunc
 }
@@ -51,7 +51,7 @@ func (s *HandlerSuite) SetUpTest(c *check.C) {
 	s.cluster.TLS.Insecure = true
 	arvadostest.SetServiceURL(&s.cluster.Services.RailsAPI, "https://"+os.Getenv("ARVADOS_TEST_API_HOST"))
 	arvadostest.SetServiceURL(&s.cluster.Services.Controller, "http://localhost:/")
-	s.handler = newHandler(s.ctx, s.cluster, "", prometheus.NewRegistry())
+	s.handler = newHandler(s.ctx, s.cluster, "", prometheus.NewRegistry()).(*Handler)
 }
 
 func (s *HandlerSuite) TearDownTest(c *check.C) {
@@ -276,7 +276,7 @@ func (s *HandlerSuite) TestLogoutGoogle(c *check.C) {
 
 func (s *HandlerSuite) TestValidateV1APIToken(c *check.C) {
 	req := httptest.NewRequest("GET", "/arvados/v1/users/current", nil)
-	user, ok, err := s.handler.(*Handler).validateAPItoken(req, arvadostest.ActiveToken)
+	user, ok, err := s.handler.validateAPItoken(req, arvadostest.ActiveToken)
 	c.Assert(err, check.IsNil)
 	c.Check(ok, check.Equals, true)
 	c.Check(user.Authorization.UUID, check.Equals, arvadostest.ActiveTokenUUID)
@@ -287,7 +287,7 @@ func (s *HandlerSuite) TestValidateV1APIToken(c *check.C) {
 
 func (s *HandlerSuite) TestValidateV2APIToken(c *check.C) {
 	req := httptest.NewRequest("GET", "/arvados/v1/users/current", nil)
-	user, ok, err := s.handler.(*Handler).validateAPItoken(req, arvadostest.ActiveTokenV2)
+	user, ok, err := s.handler.validateAPItoken(req, arvadostest.ActiveTokenV2)
 	c.Assert(err, check.IsNil)
 	c.Check(ok, check.Equals, true)
 	c.Check(user.Authorization.UUID, check.Equals, arvadostest.ActiveTokenUUID)
@@ -319,11 +319,11 @@ func (s *HandlerSuite) TestValidateRemoteToken(c *check.C) {
 
 func (s *HandlerSuite) TestCreateAPIToken(c *check.C) {
 	req := httptest.NewRequest("GET", "/arvados/v1/users/current", nil)
-	auth, err := s.handler.(*Handler).createAPItoken(req, arvadostest.ActiveUserUUID, nil)
+	auth, err := s.handler.createAPItoken(req, arvadostest.ActiveUserUUID, nil)
 	c.Assert(err, check.IsNil)
 	c.Check(auth.Scopes, check.DeepEquals, []string{"all"})
 
-	user, ok, err := s.handler.(*Handler).validateAPItoken(req, auth.TokenV2())
+	user, ok, err := s.handler.validateAPItoken(req, auth.TokenV2())
 	c.Assert(err, check.IsNil)
 	c.Check(ok, check.Equals, true)
 	c.Check(user.Authorization.UUID, check.Equals, auth.UUID)
@@ -430,3 +430,30 @@ func (s *HandlerSuite) TestRedactRailsAPIHostFromErrors(c *check.C) {
 	c.Check(jresp.Errors[0], check.Matches, `.*//railsapi\.internal/arvados/v1/collections/.*: 404 Not Found.*`)
 	c.Check(jresp.Errors[0], check.Not(check.Matches), `(?ms).*127.0.0.1.*`)
 }
+
+func (s *HandlerSuite) TestTrashSweep(c *check.C) {
+	s.cluster.SystemRootToken = arvadostest.SystemRootToken
+	s.cluster.Collections.TrashSweepInterval = arvados.Duration(time.Second / 10)
+	s.handler.CheckHealth()
+	ctx := auth.NewContext(s.ctx, &auth.Credentials{Tokens: []string{arvadostest.ActiveTokenV2}})
+	coll, err := s.handler.federation.CollectionCreate(ctx, arvados.CreateOptions{Attrs: map[string]interface{}{"name": "test trash sweep"}, EnsureUniqueName: true})
+	c.Assert(err, check.IsNil)
+	defer s.handler.federation.CollectionDelete(ctx, arvados.DeleteOptions{UUID: coll.UUID})
+	db, err := s.handler.db(s.ctx)
+	c.Assert(err, check.IsNil)
+	_, err = db.ExecContext(s.ctx, `update collections set trash_at = $1, delete_at = $2 where uuid = $3`, time.Now().UTC().Add(time.Second/10), time.Now().UTC().Add(time.Hour), coll.UUID)
+	c.Assert(err, check.IsNil)
+	deadline := time.Now().Add(5 * time.Second)
+	for {
+		if time.Now().After(deadline) {
+			c.Log("timed out")
+			c.FailNow()
+		}
+		updated, err := s.handler.federation.CollectionGet(ctx, arvados.GetOptions{UUID: coll.UUID, IncludeTrash: true})
+		c.Assert(err, check.IsNil)
+		if updated.IsTrashed {
+			break
+		}
+		time.Sleep(time.Second / 10)
+	}
+}
diff --git a/lib/controller/rpc/conn.go b/lib/controller/rpc/conn.go
index 25f47bc3b..736ef711e 100644
--- a/lib/controller/rpc/conn.go
+++ b/lib/controller/rpc/conn.go
@@ -572,6 +572,13 @@ func (conn *Conn) SpecimenDelete(ctx context.Context, options arvados.DeleteOpti
 	return resp, err
 }
 
+func (conn *Conn) SysTrashSweep(ctx context.Context, options struct{}) (struct{}, error) {
+	ep := arvados.EndpointSysTrashSweep
+	var resp struct{}
+	err := conn.requestAndDecode(ctx, &resp, ep, nil, options)
+	return resp, err
+}
+
 func (conn *Conn) UserCreate(ctx context.Context, options arvados.CreateOptions) (arvados.User, error) {
 	ep := arvados.EndpointUserCreate
 	var resp arvados.User
diff --git a/lib/controller/server_test.go b/lib/controller/server_test.go
index b2b3365a2..4f3d4a568 100644
--- a/lib/controller/server_test.go
+++ b/lib/controller/server_test.go
@@ -35,11 +35,14 @@ func integrationTestCluster() *arvados.Cluster {
 // provided by the integration-testing environment.
 func newServerFromIntegrationTestEnv(c *check.C) *httpserver.Server {
 	log := ctxlog.TestLogger(c)
-
-	handler := &Handler{Cluster: &arvados.Cluster{
-		ClusterID:  "zzzzz",
-		PostgreSQL: integrationTestCluster().PostgreSQL,
-	}}
+	ctx := ctxlog.Context(context.Background(), log)
+	handler := &Handler{
+		Cluster: &arvados.Cluster{
+			ClusterID:  "zzzzz",
+			PostgreSQL: integrationTestCluster().PostgreSQL,
+		},
+		BackgroundContext: ctx,
+	}
 	handler.Cluster.TLS.Insecure = true
 	handler.Cluster.Collections.BlobSigning = true
 	handler.Cluster.Collections.BlobSigningKey = arvadostest.BlobSigningKey
@@ -49,10 +52,8 @@ func newServerFromIntegrationTestEnv(c *check.C) *httpserver.Server {
 
 	srv := &httpserver.Server{
 		Server: http.Server{
-			BaseContext: func(net.Listener) context.Context {
-				return ctxlog.Context(context.Background(), log)
-			},
-			Handler: httpserver.AddRequestIDs(httpserver.LogRequests(handler)),
+			BaseContext: func(net.Listener) context.Context { return ctx },
+			Handler:     httpserver.AddRequestIDs(httpserver.LogRequests(handler)),
 		},
 		Addr: ":",
 	}
diff --git a/lib/controller/trash.go b/lib/controller/trash.go
new file mode 100644
index 000000000..551b2f92b
--- /dev/null
+++ b/lib/controller/trash.go
@@ -0,0 +1,33 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package controller
+
+import (
+	"time"
+
+	"git.arvados.org/arvados.git/lib/controller/dblock"
+	"git.arvados.org/arvados.git/sdk/go/auth"
+	"git.arvados.org/arvados.git/sdk/go/ctxlog"
+)
+
+func (h *Handler) trashSweepWorker() {
+	sleep := h.Cluster.Collections.TrashSweepInterval.Duration()
+	logger := ctxlog.FromContext(h.BackgroundContext).WithField("worker", "trash sweep")
+	ctx := ctxlog.Context(h.BackgroundContext, logger)
+	if sleep <= 0 {
+		logger.Debugf("Collections.TrashSweepInterval is %v, not running worker", sleep)
+		return
+	}
+	dblock.TrashSweep.Lock(ctx, h.db)
+	defer dblock.TrashSweep.Unlock()
+	for time.Sleep(sleep); ctx.Err() == nil; time.Sleep(sleep) {
+		dblock.TrashSweep.Check()
+		ctx := auth.NewContext(ctx, &auth.Credentials{Tokens: []string{h.Cluster.SystemRootToken}})
+		_, err := h.federation.SysTrashSweep(ctx, struct{}{})
+		if err != nil {
+			logger.WithError(err).Info("trash sweep failed")
+		}
+	}
+}
diff --git a/sdk/go/arvados/api.go b/sdk/go/arvados/api.go
index 0fdc13d19..d4af0e7a8 100644
--- a/sdk/go/arvados/api.go
+++ b/sdk/go/arvados/api.go
@@ -68,6 +68,7 @@ var (
 	EndpointLinkGet                       = APIEndpoint{"GET", "arvados/v1/links/{uuid}", ""}
 	EndpointLinkList                      = APIEndpoint{"GET", "arvados/v1/links", ""}
 	EndpointLinkDelete                    = APIEndpoint{"DELETE", "arvados/v1/links/{uuid}", ""}
+	EndpointSysTrashSweep                 = APIEndpoint{"POST", "sys/trash_sweep", ""}
 	EndpointUserActivate                  = APIEndpoint{"POST", "arvados/v1/users/{uuid}/activate", ""}
 	EndpointUserCreate                    = APIEndpoint{"POST", "arvados/v1/users", "user"}
 	EndpointUserCurrent                   = APIEndpoint{"GET", "arvados/v1/users/current", ""}
@@ -269,6 +270,7 @@ type API interface {
 	SpecimenGet(ctx context.Context, options GetOptions) (Specimen, error)
 	SpecimenList(ctx context.Context, options ListOptions) (SpecimenList, error)
 	SpecimenDelete(ctx context.Context, options DeleteOptions) (Specimen, error)
+	SysTrashSweep(ctx context.Context, options struct{}) (struct{}, error)
 	UserCreate(ctx context.Context, options CreateOptions) (User, error)
 	UserUpdate(ctx context.Context, options UpdateOptions) (User, error)
 	UserMerge(ctx context.Context, options UserMergeOptions) (User, error)
diff --git a/sdk/go/arvados/client.go b/sdk/go/arvados/client.go
index 13bb3bf80..5ec828667 100644
--- a/sdk/go/arvados/client.go
+++ b/sdk/go/arvados/client.go
@@ -217,6 +217,8 @@ func (c *Client) DoAndDecode(dst interface{}, req *http.Request) error {
 		return err
 	}
 	switch {
+	case resp.StatusCode == http.StatusNoContent:
+		return nil
 	case resp.StatusCode == http.StatusOK && dst == nil:
 		return nil
 	case resp.StatusCode == http.StatusOK:
diff --git a/sdk/go/arvadostest/api.go b/sdk/go/arvadostest/api.go
index 0af477125..6990a3fdf 100644
--- a/sdk/go/arvadostest/api.go
+++ b/sdk/go/arvadostest/api.go
@@ -209,6 +209,10 @@ func (as *APIStub) SpecimenDelete(ctx context.Context, options arvados.DeleteOpt
 	as.appendCall(ctx, as.SpecimenDelete, options)
 	return arvados.Specimen{}, as.Error
 }
+func (as *APIStub) SysTrashSweep(ctx context.Context, options struct{}) (struct{}, error) {
+	as.appendCall(ctx, as.SysTrashSweep, options)
+	return struct{}{}, as.Error
+}
 func (as *APIStub) UserCreate(ctx context.Context, options arvados.CreateOptions) (arvados.User, error) {
 	as.appendCall(ctx, as.UserCreate, options)
 	return arvados.User{}, as.Error
diff --git a/sdk/go/arvadostest/api_test.go b/sdk/go/arvadostest/api_test.go
new file mode 100644
index 000000000..798d03544
--- /dev/null
+++ b/sdk/go/arvadostest/api_test.go
@@ -0,0 +1,10 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package arvadostest
+
+import "git.arvados.org/arvados.git/sdk/go/arvados"
+
+// Test that *APIStub implements arvados.API
+var _ arvados.API = &APIStub{}
diff --git a/services/api/app/controllers/arvados/v1/schema_controller.rb b/services/api/app/controllers/arvados/v1/schema_controller.rb
index c1d4b74d6..59ac639ba 100644
--- a/services/api/app/controllers/arvados/v1/schema_controller.rb
+++ b/services/api/app/controllers/arvados/v1/schema_controller.rb
@@ -427,6 +427,27 @@ class Arvados::V1::SchemaController < ApplicationController
         }
       }
 
+      discovery[:resources]['sys'] = {
+        methods: {
+          get: {
+            id: "arvados.sys.trash_sweep",
+            path: "sys/trash_sweep",
+            httpMethod: "POST",
+            description: "apply scheduled trash and delete operations",
+            parameters: {
+            },
+            parameterOrder: [
+            ],
+            response: {
+            },
+            scopes: [
+              "https://api.arvados.org/auth/arvados",
+              "https://api.arvados.org/auth/arvados.readonly"
+            ]
+          },
+        }
+      }
+
       Rails.configuration.API.DisabledAPIs.each do |method, _|
         ctrl, action = method.to_s.split('.', 2)
         discovery[:resources][ctrl][:methods].delete(action.to_sym)
diff --git a/services/api/lib/sweep_trashed_objects.rb b/services/api/app/controllers/sys_controller.rb
similarity index 55%
rename from services/api/lib/sweep_trashed_objects.rb
rename to services/api/app/controllers/sys_controller.rb
index c09896567..a67b124bd 100644
--- a/services/api/lib/sweep_trashed_objects.rb
+++ b/services/api/app/controllers/sys_controller.rb
@@ -2,33 +2,12 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-require 'current_api_client'
+class SysController < ApplicationController
+  skip_before_action :find_object_by_uuid
+  skip_before_action :render_404_if_no_object
+  before_action :admin_required
 
-module SweepTrashedObjects
-  extend CurrentApiClient
-
-  def self.delete_project_and_contents(p_uuid)
-    p = Group.find_by_uuid(p_uuid)
-    if !p || p.group_class != 'project'
-      raise "can't sweep group '#{p_uuid}', it may not exist or not be a project"
-    end
-    # First delete sub projects
-    Group.where({group_class: 'project', owner_uuid: p_uuid}).each do |sub_project|
-      delete_project_and_contents(sub_project.uuid)
-    end
-    # Next, iterate over all tables which have owner_uuid fields, with some
-    # exceptions, and delete records owned by this project
-    skipped_classes = ['Group', 'User']
-    ActiveRecord::Base.descendants.reject(&:abstract_class?).each do |klass|
-      if !skipped_classes.include?(klass.name) && klass.columns.collect(&:name).include?('owner_uuid')
-        klass.where({owner_uuid: p_uuid}).destroy_all
-      end
-    end
-    # Finally delete the project itself
-    p.destroy
-  end
-
-  def self.sweep_now
+  def trash_sweep
     act_as_system_user do
       # Sweep trashed collections
       Collection.
@@ -38,45 +17,43 @@ module SweepTrashedObjects
         where('is_trashed = false and trash_at < statement_timestamp()').
         update_all('is_trashed = true')
 
-      # Sweep trashed projects and their contents
+      # Sweep trashed projects and their contents (as well as role
+      # groups that were trashed before #18340 when that was
+      # disallowed)
       Group.
-        where({group_class: 'project'}).
         where('delete_at is not null and delete_at < statement_timestamp()').each do |project|
           delete_project_and_contents(project.uuid)
       end
       Group.
-        where({group_class: 'project'}).
         where('is_trashed = false and trash_at < statement_timestamp()').
         update_all('is_trashed = true')
 
       # Sweep expired tokens
       ActiveRecord::Base.connection.execute("DELETE from api_client_authorizations where expires_at <= statement_timestamp()")
     end
+    head :no_content
   end
 
-  def self.sweep_if_stale
-    return if Rails.configuration.Collections.TrashSweepInterval <= 0
-    exp = Rails.configuration.Collections.TrashSweepInterval.seconds
-    need = false
-    Rails.cache.fetch('SweepTrashedObjects', expires_in: exp) do
-      need = true
+  protected
+
+  def delete_project_and_contents(p_uuid)
+    p = Group.find_by_uuid(p_uuid)
+    if !p
+      raise "can't sweep group '#{p_uuid}', it may not exist"
+    end
+    # First delete sub projects
+    Group.where({group_class: 'project', owner_uuid: p_uuid}).each do |sub_project|
+      delete_project_and_contents(sub_project.uuid)
     end
-    if need
-      Thread.new do
-        Thread.current.abort_on_exception = false
-        begin
-          sweep_now
-        rescue => e
-          Rails.logger.error "#{e.class}: #{e}\n#{e.backtrace.join("\n\t")}"
-        ensure
-          # Rails 5.1+ makes test threads share a database connection, so we can't
-          # close a connection shared with other threads.
-          # https://github.com/rails/rails/commit/deba47799ff905f778e0c98a015789a1327d5087
-          if Rails.env != "test"
-            ActiveRecord::Base.connection.close
-          end
-        end
+    # Next, iterate over all tables which have owner_uuid fields, with some
+    # exceptions, and delete records owned by this project
+    skipped_classes = ['Group', 'User']
+    ActiveRecord::Base.descendants.reject(&:abstract_class?).each do |klass|
+      if !skipped_classes.include?(klass.name) && klass.columns.collect(&:name).include?('owner_uuid')
+        klass.where({owner_uuid: p_uuid}).destroy_all
       end
     end
+    # Finally delete the project itself
+    p.destroy
   end
 end
diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index a98cde444..b4660dbd3 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -3,7 +3,6 @@
 # SPDX-License-Identifier: AGPL-3.0
 
 require 'arvados/keep'
-require 'sweep_trashed_objects'
 require 'trashable'
 
 class Collection < ArvadosModel
@@ -616,11 +615,6 @@ class Collection < ArvadosModel
     super - ["manifest_text", "storage_classes_desired", "storage_classes_confirmed", "current_version_uuid"]
   end
 
-  def self.where *args
-    SweepTrashedObjects.sweep_if_stale
-    super
-  end
-
   protected
 
   # Although the defaults for these columns is already set up on the schema,
diff --git a/services/api/config/routes.rb b/services/api/config/routes.rb
index 738426b1d..98f5788d6 100644
--- a/services/api/config/routes.rb
+++ b/services/api/config/routes.rb
@@ -92,6 +92,8 @@ Rails.application.routes.draw do
     end
   end
 
+  post '/sys/trash_sweep', to: 'sys#trash_sweep'
+
   if Rails.env == 'test'
     post '/database/reset', to: 'database#reset'
   end
diff --git a/services/api/test/functional/sys_controller_test.rb b/services/api/test/functional/sys_controller_test.rb
new file mode 100644
index 000000000..e13d70298
--- /dev/null
+++ b/services/api/test/functional/sys_controller_test.rb
@@ -0,0 +1,135 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require 'test_helper'
+
+class SysControllerTest < ActionController::TestCase
+  include CurrentApiClient
+  include DbCurrentTime
+
+  test "trash_sweep - delete expired tokens" do
+    assert_not_empty ApiClientAuthorization.where(uuid: api_client_authorizations(:expired).uuid)
+    authorize_with :admin
+    post :trash_sweep
+    assert_response :success
+    assert_empty ApiClientAuthorization.where(uuid: api_client_authorizations(:expired).uuid)
+  end
+
+  test "trash_sweep - fail with non-admin token" do
+    authorize_with :active
+    post :trash_sweep
+    assert_response 403
+  end
+
+  test "trash_sweep - move collections to trash" do
+    c = collections(:trashed_on_next_sweep)
+    refute_empty Collection.where('uuid=? and is_trashed=false', c.uuid)
+    assert_raises(ActiveRecord::RecordNotUnique) do
+      act_as_user users(:active) do
+        Collection.create!(owner_uuid: c.owner_uuid,
+                           name: c.name)
+      end
+    end
+    authorize_with :admin
+    post :trash_sweep
+    assert_response :success
+    c = Collection.where('uuid=? and is_trashed=true', c.uuid).first
+    assert c
+    act_as_user users(:active) do
+      assert Collection.create!(owner_uuid: c.owner_uuid,
+                                name: c.name)
+    end
+  end
+
+  test "trash_sweep - delete collections" do
+    uuid = 'zzzzz-4zz18-3u1p5umicfpqszp' # deleted_on_next_sweep
+    assert_not_empty Collection.where(uuid: uuid)
+    authorize_with :admin
+    post :trash_sweep
+    assert_response :success
+    assert_empty Collection.where(uuid: uuid)
+  end
+
+  test "trash_sweep - delete referring links" do
+    uuid = collections(:trashed_on_next_sweep).uuid
+    act_as_system_user do
+      assert_raises ActiveRecord::RecordInvalid do
+        # Cannot create because :trashed_on_next_sweep is already trashed
+        Link.create!(head_uuid: uuid,
+                     tail_uuid: system_user_uuid,
+                     link_class: 'whatever',
+                     name: 'something')
+      end
+
+      # Bump trash_at to now + 1 minute
+      Collection.where(uuid: uuid).
+        update(trash_at: db_current_time + (1).minute)
+
+      # Not considered trashed now
+      Link.create!(head_uuid: uuid,
+                   tail_uuid: system_user_uuid,
+                   link_class: 'whatever',
+                   name: 'something')
+    end
+    past = db_current_time
+    Collection.where(uuid: uuid).
+      update_all(is_trashed: true, trash_at: past, delete_at: past)
+    assert_not_empty Collection.where(uuid: uuid)
+    authorize_with :admin
+    post :trash_sweep
+    assert_response :success
+    assert_empty Collection.where(uuid: uuid)
+  end
+
+  test "trash_sweep - move projects to trash" do
+    p = groups(:trashed_on_next_sweep)
+    assert_empty Group.where('uuid=? and is_trashed=true', p.uuid)
+    authorize_with :admin
+    post :trash_sweep
+    assert_response :success
+    assert_not_empty Group.where('uuid=? and is_trashed=true', p.uuid)
+  end
+
+  test "trash_sweep - delete projects and their contents" do
+    g_foo = groups(:trashed_project)
+    g_bar = groups(:trashed_subproject)
+    g_baz = groups(:trashed_subproject3)
+    col = collections(:collection_in_trashed_subproject)
+    job = jobs(:job_in_trashed_project)
+    cr = container_requests(:cr_in_trashed_project)
+    # Save how many objects were before the sweep
+    user_nr_was = User.all.length
+    coll_nr_was = Collection.all.length
+    group_nr_was = Group.where('group_class<>?', 'project').length
+    project_nr_was = Group.where(group_class: 'project').length
+    cr_nr_was = ContainerRequest.all.length
+    job_nr_was = Job.all.length
+    assert_not_empty Group.where(uuid: g_foo.uuid)
+    assert_not_empty Group.where(uuid: g_bar.uuid)
+    assert_not_empty Group.where(uuid: g_baz.uuid)
+    assert_not_empty Collection.where(uuid: col.uuid)
+    assert_not_empty Job.where(uuid: job.uuid)
+    assert_not_empty ContainerRequest.where(uuid: cr.uuid)
+
+    authorize_with :admin
+    post :trash_sweep
+    assert_response :success
+
+    assert_empty Group.where(uuid: g_foo.uuid)
+    assert_empty Group.where(uuid: g_bar.uuid)
+    assert_empty Group.where(uuid: g_baz.uuid)
+    assert_empty Collection.where(uuid: col.uuid)
+    assert_empty Job.where(uuid: job.uuid)
+    assert_empty ContainerRequest.where(uuid: cr.uuid)
+    # No unwanted deletions should have happened
+    assert_equal user_nr_was, User.all.length
+    assert_equal coll_nr_was-2,        # collection_in_trashed_subproject
+                 Collection.all.length # & deleted_on_next_sweep collections
+    assert_equal group_nr_was, Group.where('group_class<>?', 'project').length
+    assert_equal project_nr_was-3, Group.where(group_class: 'project').length
+    assert_equal cr_nr_was-1, ContainerRequest.all.length
+    assert_equal job_nr_was-1, Job.all.length
+  end
+
+end
diff --git a/services/api/test/integration/errors_test.rb b/services/api/test/integration/errors_test.rb
index e3224f491..a2a1545ce 100644
--- a/services/api/test/integration/errors_test.rb
+++ b/services/api/test/integration/errors_test.rb
@@ -24,7 +24,7 @@ class ErrorsTest < ActionDispatch::IntegrationTest
       # Generally, new routes should appear under /arvados/v1/. If
       # they appear elsewhere, that might have been caused by default
       # rails generator behavior that we don't want.
-      assert_match(/^\/(|\*a|arvados\/v1\/.*|auth\/.*|login|logout|database\/reset|discovery\/.*|static\/.*|themes\/.*|assets|_health\/.*)(\(\.:format\))?$/,
+      assert_match(/^\/(|\*a|arvados\/v1\/.*|auth\/.*|login|logout|database\/reset|discovery\/.*|static\/.*|sys\/trash_sweep|themes\/.*|assets|_health\/.*)(\(\.:format\))?$/,
                    route.path.spec.to_s,
                    "Unexpected new route: #{route.path.spec}")
     end
diff --git a/services/api/test/unit/api_client_authorization_test.rb b/services/api/test/unit/api_client_authorization_test.rb
index fb90418b8..e043f8914 100644
--- a/services/api/test/unit/api_client_authorization_test.rb
+++ b/services/api/test/unit/api_client_authorization_test.rb
@@ -3,7 +3,6 @@
 # SPDX-License-Identifier: AGPL-3.0
 
 require 'test_helper'
-require 'sweep_trashed_objects'
 
 class ApiClientAuthorizationTest < ActiveSupport::TestCase
   include CurrentApiClient
@@ -20,12 +19,6 @@ class ApiClientAuthorizationTest < ActiveSupport::TestCase
     end
   end
 
-  test "delete expired in SweepTrashedObjects" do
-    assert_not_empty ApiClientAuthorization.where(uuid: api_client_authorizations(:expired).uuid)
-    SweepTrashedObjects.sweep_now
-    assert_empty ApiClientAuthorization.where(uuid: api_client_authorizations(:expired).uuid)
-  end
-
   test "accepts SystemRootToken" do
     assert_nil ApiClientAuthorization.validate(token: "xxxSystemRootTokenxxx")
 
diff --git a/services/api/test/unit/collection_test.rb b/services/api/test/unit/collection_test.rb
index de0f1d360..e7134a5be 100644
--- a/services/api/test/unit/collection_test.rb
+++ b/services/api/test/unit/collection_test.rb
@@ -3,7 +3,6 @@
 # SPDX-License-Identifier: AGPL-3.0
 
 require 'test_helper'
-require 'sweep_trashed_objects'
 require 'fix_collection_versions_timestamps'
 
 class CollectionTest < ActiveSupport::TestCase
@@ -1058,60 +1057,6 @@ class CollectionTest < ActiveSupport::TestCase
     assert_includes(coll_uuids, collections(:docker_image).uuid)
   end
 
-  test "move collections to trash in SweepTrashedObjects" do
-    c = collections(:trashed_on_next_sweep)
-    refute_empty Collection.where('uuid=? and is_trashed=false', c.uuid)
-    assert_raises(ActiveRecord::RecordNotUnique) do
-      act_as_user users(:active) do
-        Collection.create!(owner_uuid: c.owner_uuid,
-                           name: c.name)
-      end
-    end
-    SweepTrashedObjects.sweep_now
-    c = Collection.where('uuid=? and is_trashed=true', c.uuid).first
-    assert c
-    act_as_user users(:active) do
-      assert Collection.create!(owner_uuid: c.owner_uuid,
-                                name: c.name)
-    end
-  end
-
-  test "delete collections in SweepTrashedObjects" do
-    uuid = 'zzzzz-4zz18-3u1p5umicfpqszp' # deleted_on_next_sweep
-    assert_not_empty Collection.where(uuid: uuid)
-    SweepTrashedObjects.sweep_now
-    assert_empty Collection.where(uuid: uuid)
-  end
-
-  test "delete referring links in SweepTrashedObjects" do
-    uuid = collections(:trashed_on_next_sweep).uuid
-    act_as_system_user do
-      assert_raises ActiveRecord::RecordInvalid do
-        # Cannot create because :trashed_on_next_sweep is already trashed
-        Link.create!(head_uuid: uuid,
-                     tail_uuid: system_user_uuid,
-                     link_class: 'whatever',
-                     name: 'something')
-      end
-
-      # Bump trash_at to now + 1 minute
-      Collection.where(uuid: uuid).
-        update(trash_at: db_current_time + (1).minute)
-
-      # Not considered trashed now
-      Link.create!(head_uuid: uuid,
-                   tail_uuid: system_user_uuid,
-                   link_class: 'whatever',
-                   name: 'something')
-    end
-    past = db_current_time
-    Collection.where(uuid: uuid).
-      update_all(is_trashed: true, trash_at: past, delete_at: past)
-    assert_not_empty Collection.where(uuid: uuid)
-    SweepTrashedObjects.sweep_now
-    assert_empty Collection.where(uuid: uuid)
-  end
-
   test "empty names are exempt from name uniqueness" do
     act_as_user users(:active) do
       c1 = Collection.new(name: nil, manifest_text: '', owner_uuid: groups(:aproject).uuid)
diff --git a/services/api/test/unit/group_test.rb b/services/api/test/unit/group_test.rb
index 017916f48..10932e116 100644
--- a/services/api/test/unit/group_test.rb
+++ b/services/api/test/unit/group_test.rb
@@ -228,50 +228,6 @@ class GroupTest < ActiveSupport::TestCase
     assert User.readable_by(users(:admin)).where(uuid:  u_bar.uuid).any?
   end
 
-  test "move projects to trash in SweepTrashedObjects" do
-    p = groups(:trashed_on_next_sweep)
-    assert_empty Group.where('uuid=? and is_trashed=true', p.uuid)
-    SweepTrashedObjects.sweep_now
-    assert_not_empty Group.where('uuid=? and is_trashed=true', p.uuid)
-  end
-
-  test "delete projects and their contents in SweepTrashedObjects" do
-    g_foo = groups(:trashed_project)
-    g_bar = groups(:trashed_subproject)
-    g_baz = groups(:trashed_subproject3)
-    col = collections(:collection_in_trashed_subproject)
-    job = jobs(:job_in_trashed_project)
-    cr = container_requests(:cr_in_trashed_project)
-    # Save how many objects were before the sweep
-    user_nr_was = User.all.length
-    coll_nr_was = Collection.all.length
-    group_nr_was = Group.where('group_class<>?', 'project').length
-    project_nr_was = Group.where(group_class: 'project').length
-    cr_nr_was = ContainerRequest.all.length
-    job_nr_was = Job.all.length
-    assert_not_empty Group.where(uuid: g_foo.uuid)
-    assert_not_empty Group.where(uuid: g_bar.uuid)
-    assert_not_empty Group.where(uuid: g_baz.uuid)
-    assert_not_empty Collection.where(uuid: col.uuid)
-    assert_not_empty Job.where(uuid: job.uuid)
-    assert_not_empty ContainerRequest.where(uuid: cr.uuid)
-    SweepTrashedObjects.sweep_now
-    assert_empty Group.where(uuid: g_foo.uuid)
-    assert_empty Group.where(uuid: g_bar.uuid)
-    assert_empty Group.where(uuid: g_baz.uuid)
-    assert_empty Collection.where(uuid: col.uuid)
-    assert_empty Job.where(uuid: job.uuid)
-    assert_empty ContainerRequest.where(uuid: cr.uuid)
-    # No unwanted deletions should have happened
-    assert_equal user_nr_was, User.all.length
-    assert_equal coll_nr_was-2,        # collection_in_trashed_subproject
-                 Collection.all.length # & deleted_on_next_sweep collections
-    assert_equal group_nr_was, Group.where('group_class<>?', 'project').length
-    assert_equal project_nr_was-3, Group.where(group_class: 'project').length
-    assert_equal cr_nr_was-1, ContainerRequest.all.length
-    assert_equal job_nr_was-1, Job.all.length
-  end
-
   test "project names must be displayable in a filesystem" do
     set_user_from_auth :active
     ["", "{SOLIDUS}"].each do |subst|

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list