[ARVADOS] updated: 6cec281e8653731602ab871cc41ddd21ca8182ab

Git user git at public.curoverse.com
Wed Dec 28 01:21:16 EST 2016


Summary of changes:
 .../arvados/v1/collections_controller.rb           | 25 ++++++++-
 services/api/app/models/collection.rb              | 33 ++++++++----
 services/api/config/application.default.yml        |  1 +
 services/api/config/routes.rb                      |  1 +
 services/api/lib/load_param.rb                     |  3 +-
 services/api/lib/sweep_trashed_collections.rb      |  3 +-
 services/api/test/fixtures/collections.yml         | 30 +++++++++++
 .../arvados/v1/collections_controller_test.rb      | 60 ++++++++++++++++++++++
 services/api/test/integration/select_test.rb       |  7 +++
 services/api/test/unit/collection_test.rb          | 27 ++++++++++
 10 files changed, 176 insertions(+), 14 deletions(-)

  discards  79b33fba9e39eeb4e30ad592d63825e418faed4a (commit)
  discards  c284966280b2c38949339a12dbcf19e8d508ff61 (commit)
       via  6cec281e8653731602ab871cc41ddd21ca8182ab (commit)
       via  4ee5a6efb8b01afddddaa8ce5ed7d0de42a287d6 (commit)
       via  eebb0457f1bb69ec2084cbefcbff8c19e404d556 (commit)
       via  0aa01993bc8533d417dc510b1860a4a583e093b4 (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (79b33fba9e39eeb4e30ad592d63825e418faed4a)
            \
             N -- N -- N (6cec281e8653731602ab871cc41ddd21ca8182ab)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 6cec281e8653731602ab871cc41ddd21ca8182ab
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Dec 28 00:52:29 2016 -0500

    Fix use of unquoted user input in regexp.

diff --git a/services/api/lib/load_param.rb b/services/api/lib/load_param.rb
index 3bab33f..dee0f23 100644
--- a/services/api/lib/load_param.rb
+++ b/services/api/lib/load_param.rb
@@ -153,8 +153,9 @@ module LoadParam
       # Any ordering columns must be selected when doing select,
       # otherwise it is an SQL error, so filter out invaliding orderings.
       @orders.select! { |o|
+        col, dir = o.split
         # match select column against order array entry
-        @select.select { |s| /^#{table_name}.#{s}( (asc|desc))?$/.match o }.any?
+        @select.select { |s| col == "#{table_name}.#{s}" }.any?
       }
     end
 
diff --git a/services/api/test/integration/select_test.rb b/services/api/test/integration/select_test.rb
index a7bd545..982e172 100644
--- a/services/api/test/integration/select_test.rb
+++ b/services/api/test/integration/select_test.rb
@@ -36,6 +36,13 @@ class SelectTest < ActionDispatch::IntegrationTest
     end
   end
 
+  test "select with default order" do
+    get "/arvados/v1/links", {format: :json, select: ['uuid']}, auth(:admin)
+    assert_response :success
+    uuids = json_response['items'].collect { |i| i['uuid'] }
+    assert_equal uuids, uuids.sort
+  end
+
   def assert_link_classes_ascend(current_class, prev_class)
     # Databases and Ruby don't always agree about string ordering with
     # punctuation.  If the strings aren't ascending normally, check

commit 4ee5a6efb8b01afddddaa8ce5ed7d0de42a287d6
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Dec 28 00:26:42 2016 -0500

    10538: Add /arvados/v1/collections/{uuid}/trash endpoint.

diff --git a/services/api/app/controllers/arvados/v1/collections_controller.rb b/services/api/app/controllers/arvados/v1/collections_controller.rb
index 33b7a2d..6c35897 100644
--- a/services/api/app/controllers/arvados/v1/collections_controller.rb
+++ b/services/api/app/controllers/arvados/v1/collections_controller.rb
@@ -16,7 +16,7 @@ class Arvados::V1::CollectionsController < ApplicationController
   end
 
   def find_objects_for_index
-    if params[:include_trash] || action_name == 'destroy'
+    if params[:include_trash] || ['destroy', 'trash'].include?(action_name)
       @objects = Collection.unscoped.readable_by(*@read_users)
     end
     super
@@ -58,6 +58,13 @@ class Arvados::V1::CollectionsController < ApplicationController
     show
   end
 
+  def trash
+    if !@object.is_trashed
+      @object.update_attributes!(trash_at: db_current_time)
+    end
+    show
+  end
+
   def find_collections(visited, sp, &b)
     case sp
     when ArvadosModel
diff --git a/services/api/config/routes.rb b/services/api/config/routes.rb
index f283904..f89d2c1 100644
--- a/services/api/config/routes.rb
+++ b/services/api/config/routes.rb
@@ -22,6 +22,7 @@ Server::Application.routes.draw do
       resources :collections do
         get 'provenance', on: :member
         get 'used_by', on: :member
+        post 'trash', on: :member
       end
       resources :groups do
         get 'contents', on: :collection
diff --git a/services/api/test/functional/arvados/v1/collections_controller_test.rb b/services/api/test/functional/arvados/v1/collections_controller_test.rb
index cc759a6..bd331c3 100644
--- a/services/api/test/functional/arvados/v1/collections_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/collections_controller_test.rb
@@ -973,6 +973,21 @@ EOS
     c = Collection.unscoped.find_by_uuid(uuid)
     assert_operator c.trash_at, :<, db_current_time
     assert_operator c.delete_at, :<, db_current_time
-    assert_equal c.delete_at, c.trash_at + Rails.configuration.blob_signature_ttl
+  end
+
+  ['zzzzz-4zz18-mto52zx1s7sn3ih',
+   'zzzzz-4zz18-5qa38qghh1j3nvv',
+  ].each do |uuid|
+    test "trash collection #{uuid} via trash action with grace period" do
+      authorize_with :active
+      time_before_trashing = db_current_time
+      post :trash, {
+        id: uuid,
+      }
+      assert_response 200
+      c = Collection.unscoped.find_by_uuid(uuid)
+      assert_operator c.trash_at, :<, db_current_time
+      assert_operator c.delete_at, :>=, time_before_trashing + Rails.configuration.default_trash_lifetime
+    end
   end
 end

commit eebb0457f1bb69ec2084cbefcbff8c19e404d556
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Dec 27 14:01:11 2016 -0500

    10538: Use include_trash in keep-balance to avoid premature block deletes.

diff --git a/sdk/go/arvados/collection.go b/sdk/go/arvados/collection.go
index 2090f56..3584d0d 100644
--- a/sdk/go/arvados/collection.go
+++ b/sdk/go/arvados/collection.go
@@ -20,6 +20,8 @@ type Collection struct {
 	ReplicationConfirmed   *int       `json:"replication_confirmed,omitempty"`
 	ReplicationConfirmedAt *time.Time `json:"replication_confirmed_at,omitempty"`
 	ReplicationDesired     *int       `json:"replication_desired,omitempty"`
+	DeleteAt               *time.Time `json:"delete_at,omitempty"`
+	IsTrash                bool       `json:"is_trash,omitempty"`
 }
 
 // SizedDigests returns the hash+size part of each data block
diff --git a/sdk/go/arvados/resource_list.go b/sdk/go/arvados/resource_list.go
index e9ea268..242fb7e 100644
--- a/sdk/go/arvados/resource_list.go
+++ b/sdk/go/arvados/resource_list.go
@@ -5,11 +5,12 @@ import "encoding/json"
 // ResourceListParams expresses which results are requested in a
 // list/index API.
 type ResourceListParams struct {
-	Select  []string `json:"select,omitempty"`
-	Filters []Filter `json:"filters,omitempty"`
-	Limit   *int     `json:"limit,omitempty"`
-	Offset  int      `json:"offset,omitempty"`
-	Order   string   `json:"order,omitempty"`
+	Select       []string `json:"select,omitempty"`
+	Filters      []Filter `json:"filters,omitempty"`
+	IncludeTrash bool     `json:"include_trash,omitempty"`
+	Limit        *int     `json:"limit,omitempty"`
+	Offset       int      `json:"offset,omitempty"`
+	Order        string   `json:"order,omitempty"`
 }
 
 // A Filter restricts the set of records returned by a list/index API.
diff --git a/services/keep-balance/balance_run_test.go b/services/keep-balance/balance_run_test.go
index 23d74fe..30683b4 100644
--- a/services/keep-balance/balance_run_test.go
+++ b/services/keep-balance/balance_run_test.go
@@ -302,7 +302,7 @@ func (s *runSuite) TestDryRun(c *check.C) {
 		Logger:      s.logger(c),
 	}
 	s.stub.serveCurrentUserAdmin()
-	s.stub.serveFooBarFileCollections()
+	collReqs := s.stub.serveFooBarFileCollections()
 	s.stub.serveFourDiskKeepServices()
 	s.stub.serveKeepstoreIndexFoo4Bar1()
 	trashReqs := s.stub.serveKeepstoreTrash()
@@ -310,6 +310,9 @@ func (s *runSuite) TestDryRun(c *check.C) {
 	var bal Balancer
 	_, err := bal.Run(s.config, opts)
 	c.Check(err, check.IsNil)
+	for _, req := range collReqs.reqs {
+		c.Check(req.Form.Get("include_trash"), check.Equals, "true")
+	}
 	c.Check(trashReqs.Count(), check.Equals, 0)
 	c.Check(pullReqs.Count(), check.Equals, 0)
 	stats := bal.getStatistics()
diff --git a/services/keep-balance/collection.go b/services/keep-balance/collection.go
index f4fc721..f12522f 100644
--- a/services/keep-balance/collection.go
+++ b/services/keep-balance/collection.go
@@ -30,7 +30,9 @@ func EachCollection(c *arvados.Client, pageSize int, f func(arvados.Collection)
 		progress = func(_, _ int) {}
 	}
 
-	expectCount, err := countCollections(c, arvados.ResourceListParams{})
+	expectCount, err := countCollections(c, arvados.ResourceListParams{
+		IncludeTrash: true,
+	})
 	if err != nil {
 		return err
 	}
@@ -41,9 +43,10 @@ func EachCollection(c *arvados.Client, pageSize int, f func(arvados.Collection)
 		limit = 1<<31 - 1
 	}
 	params := arvados.ResourceListParams{
-		Limit:  &limit,
-		Order:  "modified_at, uuid",
-		Select: []string{"uuid", "manifest_text", "modified_at", "portable_data_hash", "replication_desired"},
+		Limit:        &limit,
+		Order:        "modified_at, uuid",
+		Select:       []string{"uuid", "manifest_text", "modified_at", "portable_data_hash", "replication_desired"},
+		IncludeTrash: true,
 	}
 	var last arvados.Collection
 	var filterTime time.Time
@@ -89,10 +92,13 @@ func EachCollection(c *arvados.Client, pageSize int, f func(arvados.Collection)
 	}
 	progress(callCount, expectCount)
 
-	if checkCount, err := countCollections(c, arvados.ResourceListParams{Filters: []arvados.Filter{{
-		Attr:     "modified_at",
-		Operator: "<=",
-		Operand:  filterTime}}}); err != nil {
+	if checkCount, err := countCollections(c, arvados.ResourceListParams{
+		Filters: []arvados.Filter{{
+			Attr:     "modified_at",
+			Operator: "<=",
+			Operand:  filterTime}},
+		IncludeTrash: true,
+	}); err != nil {
 		return err
 	} else if callCount < checkCount {
 		return fmt.Errorf("Retrieved %d collections with modtime <= T=%q, but server now reports there are %d collections with modtime <= T", callCount, filterTime, checkCount)

commit 0aa01993bc8533d417dc510b1860a4a583e093b4
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Dec 27 13:57:53 2016 -0500

    10538: Exchange expires_at for new trash_at and delete_at columns.

diff --git a/apps/workbench/app/controllers/projects_controller.rb b/apps/workbench/app/controllers/projects_controller.rb
index 48b2c42..273f9d0 100644
--- a/apps/workbench/app/controllers/projects_controller.rb
+++ b/apps/workbench/app/controllers/projects_controller.rb
@@ -149,10 +149,10 @@ class ProjectsController < ApplicationController
         link.destroy
       end
 
-      # If this object has the 'expires_at' attribute, then simply mark it
-      # expired.
-      if item.attributes.include?("expires_at")
-        item.update_attributes expires_at: Time.now
+      # If this object has the 'trash_at' attribute, then simply mark it
+      # as trash.
+      if item.attributes.include?("trash_at")
+        item.update_attributes trash_at: Time.now
         @removed_uuids << item.uuid
       elsif item.owner_uuid == @object.uuid
         # Object is owned by this project. Remove it from the project by
@@ -161,7 +161,7 @@ class ProjectsController < ApplicationController
           item.update_attributes owner_uuid: current_user.uuid
           @removed_uuids << item.uuid
         rescue ArvadosApiClient::ApiErrorResponseException => e
-          if e.message.include? '_owner_uuid_name_unique'
+          if e.message.include? '_owner_uuid_'
             rename_to = item.name + ' removed from ' +
                         (@object.name ? @object.name : @object.uuid) +
                         ' at ' + Time.now.to_s
diff --git a/apps/workbench/test/controllers/projects_controller_test.rb b/apps/workbench/test/controllers/projects_controller_test.rb
index d0b1e28..1b19f2f 100644
--- a/apps/workbench/test/controllers/projects_controller_test.rb
+++ b/apps/workbench/test/controllers/projects_controller_test.rb
@@ -101,8 +101,9 @@ class ProjectsControllerTest < ActionController::TestCase
   end
 
   test "project admin can remove collections from the project" do
-    # Deleting an object that supports 'expires_at' should make it
-    # completely inaccessible to API queries, not simply moved out of the project.
+    # Deleting an object that supports 'trash_at' should make it
+    # completely inaccessible to API queries, not simply moved out of
+    # the project.
     coll_key = "collection_to_remove_from_subproject"
     coll_uuid = api_fixture("collections")[coll_key]["uuid"]
     delete(:remove_item,
@@ -116,12 +117,12 @@ class ProjectsControllerTest < ActionController::TestCase
 
     use_token :subproject_admin
     assert_raise ArvadosApiClient::NotFoundException do
-      Collection.find(coll_uuid)
+      Collection.find(coll_uuid, cache: false)
     end
   end
 
   test "project admin can remove items from project other than collections" do
-    # An object which does not have an expired_at field (e.g. Specimen)
+    # An object which does not have an trash_at field (e.g. Specimen)
     # should be implicitly moved to the user's Home project when removed.
     specimen_uuid = api_fixture('specimens', 'in_asubproject')['uuid']
     delete(:remove_item,
diff --git a/sdk/go/arvados/collection.go b/sdk/go/arvados/collection.go
index 71f5247..2090f56 100644
--- a/sdk/go/arvados/collection.go
+++ b/sdk/go/arvados/collection.go
@@ -12,7 +12,7 @@ import (
 // Collection is an arvados#collection resource.
 type Collection struct {
 	UUID                   string     `json:"uuid,omitempty"`
-	ExpiresAt              *time.Time `json:"expires_at,omitempty"`
+	TrashAt                *time.Time `json:"trash_at,omitempty"`
 	ManifestText           string     `json:"manifest_text,omitempty"`
 	CreatedAt              *time.Time `json:"created_at,omitempty"`
 	ModifiedAt             *time.Time `json:"modified_at,omitempty"`
diff --git a/services/api/app/controllers/arvados/v1/collections_controller.rb b/services/api/app/controllers/arvados/v1/collections_controller.rb
index 017c023..33b7a2d 100644
--- a/services/api/app/controllers/arvados/v1/collections_controller.rb
+++ b/services/api/app/controllers/arvados/v1/collections_controller.rb
@@ -1,6 +1,8 @@
 require "arvados/keep"
 
 class Arvados::V1::CollectionsController < ApplicationController
+  include DbCurrentTime
+
   def self.limit_index_columns_read
     ["manifest_text"]
   end
@@ -13,6 +15,13 @@ class Arvados::V1::CollectionsController < ApplicationController
     super
   end
 
+  def find_objects_for_index
+    if params[:include_trash] || action_name == 'destroy'
+      @objects = Collection.unscoped.readable_by(*@read_users)
+    end
+    super
+  end
+
   def find_object_by_uuid
     if loc = Keep::Locator.parse(params[:id])
       loc.strip_hints!
@@ -23,10 +32,10 @@ class Arvados::V1::CollectionsController < ApplicationController
           manifest_text: c.signed_manifest_text,
         }
       end
+      true
     else
       super
     end
-    true
   end
 
   def show
@@ -37,6 +46,18 @@ class Arvados::V1::CollectionsController < ApplicationController
     end
   end
 
+  def destroy
+    if !@object.is_trashed
+      @object.update_attributes!(trash_at: db_current_time)
+    end
+    earliest_delete = (@object.trash_at +
+                       Rails.configuration.blob_signature_ttl.seconds)
+    if @object.delete_at > earliest_delete
+      @object.update_attributes!(delete_at: earliest_delete)
+    end
+    show
+  end
+
   def find_collections(visited, sp, &b)
     case sp
     when ArvadosModel
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb
index 910db7e..fd542ca 100644
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -597,7 +597,7 @@ class ArvadosModel < ActiveRecord::Base
     if self == ArvadosModel
       # If called directly as ArvadosModel.find_by_uuid rather than via subclass,
       # delegate to the appropriate subclass based on the given uuid.
-      self.resource_class_for_uuid(uuid).find_by_uuid(uuid)
+      self.resource_class_for_uuid(uuid).unscoped.find_by_uuid(uuid)
     else
       super
     end
diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index 901084c..93d4620 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -1,4 +1,5 @@
 require 'arvados/keep'
+require 'sweep_trashed_collections'
 
 class Collection < ArvadosModel
   extend DbCurrentTime
@@ -8,17 +9,21 @@ class Collection < ArvadosModel
 
   serialize :properties, Hash
 
+  before_validation :set_validation_timestamp
   before_validation :default_empty_manifest
   before_validation :check_encoding
   before_validation :check_manifest_validity
   before_validation :check_signatures
   before_validation :strip_signatures_and_update_replication_confirmed
+  before_validation :ensure_trash_at_not_in_past
+  before_validation :sync_trash_state
+  before_validation :default_trash_interval
   validate :ensure_pdh_matches_manifest_text
+  validate :validate_trash_and_delete_timing
   before_save :set_file_names
-  before_save :expires_at_not_in_past
 
-  # Query only undeleted collections by default.
-  default_scope where("expires_at IS NULL or expires_at > statement_timestamp()")
+  # Query only untrashed collections by default.
+  default_scope where("is_trashed = false")
 
   api_accessible :user, extend: :common do |t|
     t.add :name
@@ -29,7 +34,9 @@ class Collection < ArvadosModel
     t.add :replication_desired
     t.add :replication_confirmed
     t.add :replication_confirmed_at
-    t.add :expires_at
+    t.add :delete_at
+    t.add :trash_at
+    t.add :is_trashed
   end
 
   after_initialize do
@@ -45,9 +52,9 @@ class Collection < ArvadosModel
                 # API response, and never let clients select the
                 # manifest_text column.
                 #
-                # We need expires_at to determine the correct
-                # timestamp in signed_manifest_text.
-                'manifest_text' => ['manifest_text', 'expires_at'],
+                # We need trash_at and is_trashed to determine the
+                # correct timestamp in signed_manifest_text.
+                'manifest_text' => ['manifest_text', 'trash_at', 'is_trashed'],
                 )
   end
 
@@ -77,7 +84,7 @@ class Collection < ArvadosModel
       api_token = current_api_client_authorization.andand.api_token
       signing_opts = {
         api_token: api_token,
-        now: db_current_time.to_i,
+        now: @validation_timestamp.to_i,
       }
       self.manifest_text.each_line do |entry|
         entry.split.each do |tok|
@@ -225,11 +232,15 @@ class Collection < ArvadosModel
   end
 
   def signed_manifest_text
-    if has_attribute? :manifest_text
+    if !has_attribute? :manifest_text
+      return nil
+    elsif is_trashed
+      return manifest_text
+    else
       token = current_api_client_authorization.andand.api_token
       exp = [db_current_time.to_i + Rails.configuration.blob_signature_ttl,
-             expires_at].compact.map(&:to_i).min
-      @signed_manifest_text = self.class.sign_manifest manifest_text, token, exp
+             trash_at].compact.map(&:to_i).min
+      self.class.sign_manifest manifest_text, token, exp
     end
   end
 
@@ -367,6 +378,11 @@ class Collection < ArvadosModel
     super - ["manifest_text"]
   end
 
+  def self.where *args
+    SweepTrashedCollections.sweep_if_stale
+    super
+  end
+
   protected
   def portable_manifest_text
     self.class.munge_manifest_locators(manifest_text) do |match|
@@ -403,13 +419,63 @@ class Collection < ArvadosModel
     super
   end
 
-  # If expires_at is being changed to a time in the past, change it to
+  # Use a single timestamp for all validations, even though each
+  # validation runs at a different time.
+  def set_validation_timestamp
+    @validation_timestamp = db_current_time
+  end
+
+  # If trash_at is being changed to a time in the past, change it to
   # now. This allows clients to say "expires {client-current-time}"
   # without failing due to clock skew, while avoiding odd log entries
   # like "expiry date changed to {1 year ago}".
-  def expires_at_not_in_past
-    if expires_at_changed? and expires_at
-      self.expires_at = [db_current_time, expires_at].max
+  def ensure_trash_at_not_in_past
+    if trash_at_changed? && trash_at
+      self.trash_at = [@validation_timestamp, trash_at].max
+    end
+  end
+
+  # Caller can move into/out of trash by setting/clearing is_trashed
+  # -- however, if the caller also changes trash_at, then any changes
+  # to is_trashed are ignored.
+  def sync_trash_state
+    if is_trashed_changed? && !trash_at_changed?
+      if is_trashed
+        self.trash_at = @validation_timestamp
+      else
+        self.trash_at = nil
+        self.delete_at = nil
+      end
+    end
+    self.is_trashed = trash_at && trash_at <= @validation_timestamp || false
+    true
+  end
+
+  # If trash_at is updated without touching delete_at, automatically
+  # update delete_at to a sensible value.
+  def default_trash_interval
+    if trash_at && trash_at_changed? && !delete_at_changed?
+      self.delete_at = trash_at + Rails.configuration.default_trash_lifetime.seconds
+    elsif trash_at.nil? && trash_at_changed? && !delete_at_changed?
+      self.delete_at = nil
+    end
+  end
+
+  def validate_trash_and_delete_timing
+    if trash_at.nil? != delete_at.nil?
+      errors.add :delete_at, "must be nil if and only if trash_at is nil"
     end
+
+    earliest_delete = ([@validation_timestamp, trash_at_was].compact.min +
+                       Rails.configuration.blob_signature_ttl.seconds)
+    if delete_at && delete_at < earliest_delete
+      errors.add :delete_at, "#{delete_at} is too soon: earliest allowed is #{earliest_delete}"
+    end
+
+    if delete_at && delete_at < trash_at
+      errors.add :delete_at, "must not be earlier than trash_at"
+    end
+
+    true
   end
 end
diff --git a/services/api/config/application.default.yml b/services/api/config/application.default.yml
index c8d2d1d..bb1355d 100644
--- a/services/api/config/application.default.yml
+++ b/services/api/config/application.default.yml
@@ -180,9 +180,15 @@ common:
   # The default is 2 weeks.
   blob_signature_ttl: 1209600
 
-  # Default lifetime for ephemeral collections: 2 weeks.
+  # Default lifetime for ephemeral collections: 2 weeks. This must not
+  # be less than blob_signature_ttl.
   default_trash_lifetime: 1209600
 
+  # Interval (seconds) between trash sweeps. During a trash sweep,
+  # collections are marked as trash if their trash_at time has
+  # arrived, and deleted if their delete_at time has arrived.
+  trash_sweep_interval: 60
+
   # Maximum characters of (JSON-encoded) query parameters to include
   # in each request log entry. When params exceed this size, they will
   # be JSON-encoded, truncated to this size, and logged as
@@ -445,3 +451,4 @@ test:
   git_repositories_dir: <%= Rails.root.join 'tmp', 'git', 'test' %>
   git_internal_dir: <%= Rails.root.join 'tmp', 'internal.git' %>
   websocket_address: <% if ENV['ARVADOS_TEST_EXPERIMENTAL_WS'] %>"wss://0.0.0.0:<%= ENV['ARVADOS_TEST_WSS_PORT'] %>/websocket"<% else %>false<% end %>
+  trash_sweep_interval: -1
diff --git a/services/api/db/migrate/20161222153434_split_expiry_to_trash_and_delete.rb b/services/api/db/migrate/20161222153434_split_expiry_to_trash_and_delete.rb
new file mode 100644
index 0000000..13e4419
--- /dev/null
+++ b/services/api/db/migrate/20161222153434_split_expiry_to_trash_and_delete.rb
@@ -0,0 +1,42 @@
+class SplitExpiryToTrashAndDelete < ActiveRecord::Migration
+  def up
+    Collection.transaction do
+      add_column(:collections, :trash_at, :datetime)
+      add_index(:collections, :trash_at)
+      add_column(:collections, :is_trashed, :boolean, null: false, default: false)
+      add_index(:collections, :is_trashed)
+      rename_column(:collections, :expires_at, :delete_at)
+      add_index(:collections, :delete_at)
+
+      Collection.reset_column_information
+      Collection.
+        where('delete_at is not null and delete_at <= statement_timestamp()').
+        delete_all
+      Collection.
+        where('delete_at is not null').
+        update_all('is_trashed = true, trash_at = statement_timestamp()')
+      add_index(:collections, [:owner_uuid, :name],
+                unique: true,
+                where: 'is_trashed = false',
+                name: 'index_collections_on_owner_uuid_and_name')
+      remove_index(:collections,
+                   name: 'collection_owner_uuid_name_unique')
+    end
+  end
+
+  def down
+    Collection.transaction do
+      remove_index(:collections, :delete_at)
+      rename_column(:collections, :delete_at, :expires_at)
+      add_index(:collections, [:owner_uuid, :name],
+                unique: true,
+                where: 'expires_at is null',
+                name: 'collection_owner_uuid_name_unique')
+      remove_index(:collections,
+                   name: 'index_collections_on_owner_uuid_and_name')
+      remove_column(:collections, :is_trashed)
+      remove_index(:collections, :trash_at)
+      remove_column(:collections, :trash_at)
+    end
+  end
+end
diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql
index 7ee7ea6..6ef03cf 100644
--- a/services/api/db/structure.sql
+++ b/services/api/db/structure.sql
@@ -169,8 +169,10 @@ CREATE TABLE collections (
     name character varying(255),
     description character varying(524288),
     properties text,
-    expires_at timestamp without time zone,
-    file_names character varying(8192)
+    delete_at timestamp without time zone,
+    file_names character varying(8192),
+    trash_at timestamp without time zone,
+    is_trashed boolean DEFAULT false NOT NULL
 );
 
 
@@ -1496,13 +1498,6 @@ CREATE INDEX authorized_keys_search_index ON authorized_keys USING btree (uuid,
 
 
 --
--- Name: collection_owner_uuid_name_unique; Type: INDEX; Schema: public; Owner: -; Tablespace: 
---
-
-CREATE UNIQUE INDEX collection_owner_uuid_name_unique ON collections USING btree (owner_uuid, name) WHERE (expires_at IS NULL);
-
-
---
 -- Name: collections_full_text_search_idx; Type: INDEX; Schema: public; Owner: -; Tablespace: 
 --
 
@@ -1657,6 +1652,20 @@ CREATE INDEX index_collections_on_created_at ON collections USING btree (created
 
 
 --
+-- Name: index_collections_on_delete_at; Type: INDEX; Schema: public; Owner: -; Tablespace: 
+--
+
+CREATE INDEX index_collections_on_delete_at ON collections USING btree (delete_at);
+
+
+--
+-- Name: index_collections_on_is_trashed; Type: INDEX; Schema: public; Owner: -; Tablespace: 
+--
+
+CREATE INDEX index_collections_on_is_trashed ON collections USING btree (is_trashed);
+
+
+--
 -- Name: index_collections_on_modified_at; Type: INDEX; Schema: public; Owner: -; Tablespace: 
 --
 
@@ -1671,6 +1680,20 @@ CREATE INDEX index_collections_on_owner_uuid ON collections USING btree (owner_u
 
 
 --
+-- Name: index_collections_on_owner_uuid_and_name; Type: INDEX; Schema: public; Owner: -; Tablespace: 
+--
+
+CREATE UNIQUE INDEX index_collections_on_owner_uuid_and_name ON collections USING btree (owner_uuid, name) WHERE (is_trashed = false);
+
+
+--
+-- Name: index_collections_on_trash_at; Type: INDEX; Schema: public; Owner: -; Tablespace: 
+--
+
+CREATE INDEX index_collections_on_trash_at ON collections USING btree (trash_at);
+
+
+--
 -- Name: index_collections_on_uuid; Type: INDEX; Schema: public; Owner: -; Tablespace: 
 --
 
@@ -2706,4 +2729,6 @@ INSERT INTO schema_migrations (version) VALUES ('20161115171221');
 
 INSERT INTO schema_migrations (version) VALUES ('20161115174218');
 
-INSERT INTO schema_migrations (version) VALUES ('20161213172944');
\ No newline at end of file
+INSERT INTO schema_migrations (version) VALUES ('20161213172944');
+
+INSERT INTO schema_migrations (version) VALUES ('20161222153434');
\ No newline at end of file
diff --git a/services/api/lib/sweep_trashed_collections.rb b/services/api/lib/sweep_trashed_collections.rb
new file mode 100644
index 0000000..ab2d27a
--- /dev/null
+++ b/services/api/lib/sweep_trashed_collections.rb
@@ -0,0 +1,34 @@
+require 'current_api_client'
+
+module SweepTrashedCollections
+  extend CurrentApiClient
+
+  def self.sweep_now
+    act_as_system_user do
+      Collection.unscoped.
+        where('delete_at is not null and delete_at < statement_timestamp()').
+        destroy_all
+      Collection.unscoped.
+        where('is_trashed = false and trash_at < statement_timestamp()').
+        update_all('is_trashed = true')
+    end
+  end
+
+  def self.sweep_if_stale
+    return if Rails.configuration.trash_sweep_interval <= 0
+    exp = Rails.configuration.trash_sweep_interval.seconds
+    need = false
+    Rails.cache.fetch('SweepTrashedCollections', expires_in: exp) do
+      need = true
+    end
+    if need
+      Thread.new do
+        begin
+          sweep_now
+        ensure
+          ActiveRecord::Base.connection.close
+        end
+      end
+    end
+  end
+end
diff --git a/services/api/test/fixtures/collections.yml b/services/api/test/fixtures/collections.yml
index 2272b0f..192ed17 100644
--- a/services/api/test/fixtures/collections.yml
+++ b/services/api/test/fixtures/collections.yml
@@ -230,10 +230,42 @@ expired_collection:
   modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
   modified_at: 2014-02-03T17:22:54Z
   updated_at: 2014-02-03T17:22:54Z
-  expires_at: 2001-01-01T00:00:00Z
+  is_trashed: true
+  trash_at: 2001-01-01T00:00:00Z
+  delete_at: 2038-01-01T00:00:00Z
   manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:expired\n"
   name: expired_collection
 
+trashed_on_next_sweep:
+  uuid: zzzzz-4zz18-4guozfh77ewd2f0
+  portable_data_hash: 0b21a217243bfce5617fb9224b95bcb9+49
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2016-12-07T22:01:00.123456Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  modified_at: 2016-12-27T22:01:30.123456Z
+  updated_at: 2016-12-27T22:01:30.123456Z
+  is_trashed: false
+  trash_at: 2016-12-07T22:01:30.123456Z
+  delete_at: 2112-01-01T00:00:00Z
+  manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:expired\n"
+  name: trashed_on_next_sweep
+
+deleted_on_next_sweep:
+  uuid: zzzzz-4zz18-3u1p5umicfpqszp
+  portable_data_hash: 0b21a217243bfce5617fb9224b95bcb9+49
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2016-12-07T22:01:00.234567Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  modified_at: 2016-12-27T22:01:30.234567Z
+  updated_at: 2016-12-27T22:01:30.234567Z
+  is_trashed: true
+  trash_at: 2016-12-07T22:01:30.234567Z
+  delete_at: 2016-12-27T22:01:30.234567Z
+  manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:expired\n"
+  name: deleted_on_next_sweep
+
 collection_expires_in_future:
   uuid: zzzzz-4zz18-padkqo7yb8d9i3j
   portable_data_hash: 0b21a217243bfce5617fb9224b95bcb9+49
@@ -243,7 +275,8 @@ collection_expires_in_future:
   modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
   modified_at: 2014-02-03T17:22:54Z
   updated_at: 2014-02-03T17:22:54Z
-  expires_at: 2038-01-01T00:00:00Z
+  trash_at: 2038-01-01T00:00:00Z
+  delete_at: 2038-03-01T00:00:00Z
   manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:expired\n"
   name: collection_expires_in_future
 
diff --git a/services/api/test/functional/arvados/v1/collections_controller_test.rb b/services/api/test/functional/arvados/v1/collections_controller_test.rb
index b96e22e..cc759a6 100644
--- a/services/api/test/functional/arvados/v1/collections_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/collections_controller_test.rb
@@ -1,6 +1,7 @@
 require 'test_helper'
 
 class Arvados::V1::CollectionsControllerTest < ActionController::TestCase
+  include DbCurrentTime
 
   def permit_unsigned_manifests isok=true
     # Set security model for the life of a test.
@@ -930,4 +931,48 @@ EOS
       assert_response 200
     end
   end
+
+  test 'get trashed collection with include_trash' do
+    uuid = 'zzzzz-4zz18-mto52zx1s7sn3ih'
+    authorize_with :active
+    get :show, {
+      id: uuid,
+      include_trash: true,
+    }
+    assert_response 200
+  end
+
+  test 'get trashed collection without include_trash' do
+    uuid = 'zzzzz-4zz18-mto52zx1s7sn3ih'
+    authorize_with :active
+    get :show, {
+      id: uuid,
+    }
+    assert_response 404
+  end
+
+  test 'trash collection using http DELETE verb' do
+    uuid = collections(:collection_owned_by_active).uuid
+    authorize_with :active
+    delete :destroy, {
+      id: uuid,
+    }
+    assert_response 200
+    c = Collection.unscoped.find_by_uuid(uuid)
+    assert_operator c.trash_at, :<, db_current_time
+    assert_equal c.delete_at, c.trash_at + Rails.configuration.blob_signature_ttl
+  end
+
+  test 'delete long-trashed collection immediately using http DELETE verb' do
+    uuid = 'zzzzz-4zz18-mto52zx1s7sn3ih'
+    authorize_with :active
+    delete :destroy, {
+      id: uuid,
+    }
+    assert_response 200
+    c = Collection.unscoped.find_by_uuid(uuid)
+    assert_operator c.trash_at, :<, db_current_time
+    assert_operator c.delete_at, :<, db_current_time
+    assert_equal c.delete_at, c.trash_at + Rails.configuration.blob_signature_ttl
+  end
 end
diff --git a/services/api/test/unit/collection_test.rb b/services/api/test/unit/collection_test.rb
index 1c85a71..d1cb978 100644
--- a/services/api/test/unit/collection_test.rb
+++ b/services/api/test/unit/collection_test.rb
@@ -1,4 +1,5 @@
 require 'test_helper'
+require 'sweep_trashed_collections'
 
 class CollectionTest < ActiveSupport::TestCase
   include DbCurrentTime
@@ -307,11 +308,11 @@ class CollectionTest < ActiveSupport::TestCase
     end
   end
 
-  test 'signature expiry does not exceed expires_at' do
+  test 'signature expiry does not exceed trash_at' do
     act_as_user users(:active) do
       t0 = db_current_time
       c = Collection.create!(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n", name: 'foo')
-      c.update_attributes! expires_at: (t0 + 1.hours)
+      c.update_attributes! trash_at: (t0 + 1.hours)
       c.reload
       sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text)[1].to_i
       assert_operator sig_exp.to_i, :<=, (t0 + 1.hours).to_i
@@ -322,10 +323,10 @@ class CollectionTest < ActiveSupport::TestCase
     act_as_user users(:active) do
       c = Collection.create!(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n",
                              name: 'foo',
-                             expires_at: db_current_time + 1.years)
+                             trash_at: db_current_time + 1.years)
       sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text)[1].to_i
       expect_max_sig_exp = db_current_time.to_i + Rails.configuration.blob_signature_ttl
-      assert_operator c.expires_at.to_i, :>, expect_max_sig_exp
+      assert_operator c.trash_at.to_i, :>, expect_max_sig_exp
       assert_operator sig_exp.to_i, :<=, expect_max_sig_exp
     end
   end
@@ -348,7 +349,7 @@ class CollectionTest < ActiveSupport::TestCase
       uuid = c.uuid
 
       # mark collection as expired
-      c.update_attribute 'expires_at', Time.new.strftime("%Y-%m-%d")
+      c.update_attributes!(trash_at: Time.new.strftime("%Y-%m-%d"))
       c = Collection.where(uuid: uuid)
       assert_empty c, 'Should not be able to find expired collection'
 
@@ -359,6 +360,108 @@ class CollectionTest < ActiveSupport::TestCase
     end
   end
 
+  test 'trash_at cannot be set too far in the past' do
+    act_as_user users(:active) do
+      t0 = db_current_time
+      c = Collection.create!(manifest_text: '', name: 'foo')
+      c.update_attributes! trash_at: (t0 - 2.weeks)
+      c.reload
+      assert_operator c.trash_at, :>, t0
+    end
+  end
+
+  [['trash-to-delete interval negative',
+    :collection_owned_by_active,
+    {trash_at: Time.now+2.weeks, delete_at: Time.now},
+    {state: :invalid}],
+   ['trash-to-delete interval too short',
+    :collection_owned_by_active,
+    {trash_at: Time.now+3.days, delete_at: Time.now+7.days},
+    {state: :invalid}],
+   ['trash-to-delete interval ok',
+    :collection_owned_by_active,
+    {trash_at: Time.now, delete_at: Time.now+15.days},
+    {state: :trash_now}],
+   ['trash-to-delete interval short, but far enough in future',
+    :collection_owned_by_active,
+    {trash_at: Time.now+13.days, delete_at: Time.now+15.days},
+    {state: :trash_future}],
+   ['trash by setting is_trashed bool',
+    :collection_owned_by_active,
+    {is_trashed: true},
+    {state: :trash_now}],
+   ['trash in future by setting just trash_at',
+    :collection_owned_by_active,
+    {trash_at: Time.now+1.week},
+    {state: :trash_future}],
+   ['trash in future by setting trash_at and delete_at',
+    :collection_owned_by_active,
+    {trash_at: Time.now+1.week, delete_at: Time.now+4.weeks},
+    {state: :trash_future}],
+   ['untrash by clearing is_trashed bool',
+    :expired_collection,
+    {is_trashed: false},
+    {state: :not_trash}],
+  ].each do |test_name, fixture_name, updates, expect|
+    test test_name do
+      act_as_user users(:active) do
+        min_exp = (db_current_time +
+                   Rails.configuration.blob_signature_ttl.seconds)
+        if fixture_name == :expired_collection
+          # Fixture-finder shorthand doesn't find trashed collections
+          # because they're not in the default scope.
+          c = Collection.unscoped.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3ih')
+        else
+          c = collections(fixture_name)
+        end
+        updates_ok = c.update_attributes(updates)
+        expect_valid = expect[:state] != :invalid
+        assert_equal updates_ok, expect_valid, c.errors.full_messages.to_s
+        case expect[:state]
+        when :invalid
+          refute c.valid?
+        when :trash_now
+          assert c.is_trashed
+          assert_not_nil c.trash_at
+          assert_operator c.trash_at, :<=, db_current_time
+          assert_not_nil c.delete_at
+          assert_operator c.delete_at, :>=, min_exp
+        when :trash_future
+          refute c.is_trashed
+          assert_not_nil c.trash_at
+          assert_operator c.trash_at, :>, db_current_time
+          assert_not_nil c.delete_at
+          assert_operator c.delete_at, :>=, c.trash_at
+          # Currently this minimum interval is needed to prevent early
+          # garbage collection:
+          assert_operator c.delete_at, :>=, min_exp
+        when :not_trash
+          refute c.is_trashed
+          assert_nil c.trash_at
+          assert_nil c.delete_at
+        else
+          raise "bad expect[:state]==#{expect[:state].inspect} in test case"
+        end
+      end
+    end
+  end
+
+  test 'default trash interval > blob signature ttl' do
+    Rails.configuration.default_trash_lifetime = 86400 * 21 # 3 weeks
+    start = db_current_time
+    act_as_user users(:active) do
+      c = Collection.create!(manifest_text: '', name: 'foo')
+      c.update_attributes!(trash_at: start + 86400.seconds)
+      assert_operator c.delete_at, :>=, start + (86400*22).seconds
+      assert_operator c.delete_at, :<, start + (86400*22 + 30).seconds
+      c.destroy
+
+      c = Collection.create!(manifest_text: '', name: 'foo')
+      c.update_attributes!(is_trashed: true)
+      assert_operator c.delete_at, :>=, start + (86400*21).seconds
+    end
+  end
+
   test "find_all_for_docker_image resolves names that look like hashes" do
     coll_list = Collection.
       find_all_for_docker_image('a' * 64, nil, [users(:active)])
@@ -366,13 +469,29 @@ class CollectionTest < ActiveSupport::TestCase
     assert_includes(coll_uuids, collections(:docker_image).uuid)
   end
 
-  test 'expires_at cannot be set too far in the past' do
+  test "move to trash in SweepTrashedCollections" do
+    uuid = 'zzzzz-4zz18-4guozfh77ewd2f0'
+    c = Collection.where('uuid=? and is_trashed=false', uuid).first
+    assert c
+    assert_raises(ActiveRecord::RecordNotUnique) do
+      act_as_user users(:active) do
+        Collection.create!(owner_uuid: c.owner_uuid,
+                           name: c.name)
+      end
+    end
+    SweepTrashedCollections.sweep_now
+    c = Collection.unscoped.find_by_uuid(uuid)
+    assert_equal true, c.is_trashed
     act_as_user users(:active) do
-      t0 = db_current_time
-      c = Collection.create!(manifest_text: '', name: 'foo')
-      c.update_attributes! expires_at: (t0 - 2.weeks)
-      c.reload
-      assert_operator c.expires_at, :>, t0
+      assert Collection.create!(owner_uuid: c.owner_uuid,
+                                name: c.name)
     end
   end
+
+  test "delete in SweepTrashedCollections" do
+    uuid = 'zzzzz-4zz18-3u1p5umicfpqszp'
+    assert_not_empty Collection.unscoped.where(uuid: uuid)
+    SweepTrashedCollections.sweep_now
+    assert_empty Collection.unscoped.where(uuid: uuid)
+  end
 end
diff --git a/services/arv-web/arv-web.py b/services/arv-web/arv-web.py
index 5a95e27..f440aa6 100755
--- a/services/arv-web/arv-web.py
+++ b/services/arv-web/arv-web.py
@@ -72,7 +72,7 @@ class ArvWeb(object):
                         et = 'add'
                     else:
                         et = 'remove'
-                if ev['properties']['new_attributes']['expires_at'] is not None:
+                if ev['properties']['new_attributes']['trash_at'] is not None:
                     et = 'remove'
 
             self.evqueue.put((self.project, et, ev['object_uuid']))
diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index e13033e..971cb3a 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -637,7 +637,7 @@ func (runner *ContainerRunner) CaptureOutput() error {
 	err = runner.ArvClient.Create("collections",
 		arvadosclient.Dict{
 			"collection": arvadosclient.Dict{
-				"expires_at":    time.Now().Add(runner.trashLifetime).Format(time.RFC3339),
+				"trash_at":      time.Now().Add(runner.trashLifetime).Format(time.RFC3339),
 				"name":          "output for " + runner.Container.UUID,
 				"manifest_text": manifestText}},
 		&response)
@@ -708,7 +708,7 @@ func (runner *ContainerRunner) CommitLogs() error {
 	err = runner.ArvClient.Create("collections",
 		arvadosclient.Dict{
 			"collection": arvadosclient.Dict{
-				"expires_at":    time.Now().Add(runner.trashLifetime).Format(time.RFC3339),
+				"trash_at":      time.Now().Add(runner.trashLifetime).Format(time.RFC3339),
 				"name":          "logs for " + runner.Container.UUID,
 				"manifest_text": mt}},
 		&response)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list