[ARVADOS] updated: 58e1942ff008780d88a3e512dd7806c6a3b975a6

Git user git at public.curoverse.com
Fri Feb 19 16:45:35 EST 2016


Summary of changes:
 services/datamanager/collection/collection.go | 34 +++++++++++++++++++++------
 1 file changed, 27 insertions(+), 7 deletions(-)

       via  58e1942ff008780d88a3e512dd7806c6a3b975a6 (commit)
       via  bbd85b230ccd208ef942792109fed1c9161f9429 (commit)
       via  3091c9c336ddca217b14745142f9473a489f42de (commit)
      from  23cf2d8b1245c4594bf13b5904e8b72600645d0f (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 58e1942ff008780d88a3e512dd7806c6a3b975a6
Merge: 23cf2d8 bbd85b2
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Feb 19 16:45:18 2016 -0500

    Merge branch '8485-datamanager-identical-modifiedat' of https://github.com/wtsi-hgi/arvados closes #8485


commit bbd85b230ccd208ef942792109fed1c9161f9429
Author: Joshua C. Randall <jcrandall at alum.mit.edu>
Date:   Fri Feb 19 12:27:08 2016 +0000

    Makes changes suggested by tomclegg in review of PR #39

diff --git a/services/datamanager/collection/collection.go b/services/datamanager/collection/collection.go
index 1d41a70..f0a0f3f 100644
--- a/services/datamanager/collection/collection.go
+++ b/services/datamanager/collection/collection.go
@@ -130,7 +130,7 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
 
 	sdkParams := arvadosclient.Dict{
 		"select":  fieldsWanted,
-		"order":   []string{"modified_at ASC"},
+		"order":   []string{"modified_at ASC", "uuid ASC"},
 		"filters": [][]string{[]string{"modified_at", ">=", "1900-01-01T00:00:00Z"}},
 		"offset": 0}
 
@@ -195,13 +195,17 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
 		if err != nil {
 			return
 		}
+		batchCollections := len(collections.Items)
 
-		// Update count of remaining collections
-		remainingCollections = collections.ItemsAvailable - params.BatchSize - sdkParams["offset"].(int)
-		if remainingCollections < 0 {
-			remainingCollections = 0
+		// We must always have at least one collection in the batch
+		if batchCollections < 1 {
+			err = fmt.Errorf("API query returned no collections for %+v", sdkParams)
+			return
 		}
 
+		// Update count of remaining collections
+		remainingCollections = collections.ItemsAvailable - sdkParams["offset"].(int) - batchCollections
+
 		// Process collection and update our date filter.
 		latestModificationDate, maxManifestSize, totalManifestSize, err := ProcessCollections(params.Logger,
 			collections.Items,
@@ -214,17 +218,18 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
 			sdkParams["filters"].([][]string)[0][2] = latestModificationDate.Format(time.RFC3339)
 			sdkParams["offset"] = 0
 		} else {
-			sdkParams["offset"] = sdkParams["offset"].(int) + params.BatchSize
+			sdkParams["offset"] = sdkParams["offset"].(int) + batchCollections
 		}
 
 		// update counts
 		previousTotalCollections = totalCollections
 		totalCollections = len(results.UUIDToCollection)
 
-		log.Printf("%d collections read, %d new in last batch, "+
+		log.Printf("%d collections read, %d (%d new) in last batch, "+
 			"%d remaining, "+
 			"%s latest modified date, %.0f %d %d avg,max,total manifest size",
 			totalCollections,
+			batchCollections,
 			totalCollections-previousTotalCollections,
 			remainingCollections,
 			sdkParams["filters"].([][]string)[0][2],

commit 3091c9c336ddca217b14745142f9473a489f42de
Author: Joshua C. Randall <jcrandall at alum.mit.edu>
Date:   Thu Feb 18 14:36:53 2016 +0000

    Changes GetCollection loop to more reliably fetch all collections
    
    Modifies the loop termination condition in GetCollection so that
    it continues until there are no more items available according to
    the API server (returned in collections.ItemsAvailable).
    
    Modifies the query code so that it uses an offset to page through
    results in case an entire batch has equal modified_at timestamps.

diff --git a/services/datamanager/collection/collection.go b/services/datamanager/collection/collection.go
index 1229f29..1d41a70 100644
--- a/services/datamanager/collection/collection.go
+++ b/services/datamanager/collection/collection.go
@@ -131,7 +131,8 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
 	sdkParams := arvadosclient.Dict{
 		"select":  fieldsWanted,
 		"order":   []string{"modified_at ASC"},
-		"filters": [][]string{[]string{"modified_at", ">=", "1900-01-01T00:00:00Z"}}}
+		"filters": [][]string{[]string{"modified_at", ">=", "1900-01-01T00:00:00Z"}},
+		"offset": 0}
 
 	if params.BatchSize > 0 {
 		sdkParams["limit"] = params.BatchSize
@@ -176,9 +177,10 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
 
 	// These values are just for getting the loop to run the first time,
 	// afterwards they'll be set to real values.
-	previousTotalCollections := -1
-	totalCollections := 0
-	for totalCollections > previousTotalCollections {
+	remainingCollections := 1
+	var totalCollections int
+	var previousTotalCollections int
+	for remainingCollections > 0 {
 		// We're still finding new collections
 
 		// Write the heap profile for examining memory usage
@@ -194,6 +196,12 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
 			return
 		}
 
+		// Update count of remaining collections
+		remainingCollections = collections.ItemsAvailable - params.BatchSize - sdkParams["offset"].(int)
+		if remainingCollections < 0 {
+			remainingCollections = 0
+		}
+
 		// Process collection and update our date filter.
 		latestModificationDate, maxManifestSize, totalManifestSize, err := ProcessCollections(params.Logger,
 			collections.Items,
@@ -202,16 +210,23 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
 		if err != nil {
 			return results, err
 		}
-		sdkParams["filters"].([][]string)[0][2] = latestModificationDate.Format(time.RFC3339)
+		if sdkParams["filters"].([][]string)[0][2] != latestModificationDate.Format(time.RFC3339) {
+			sdkParams["filters"].([][]string)[0][2] = latestModificationDate.Format(time.RFC3339)
+			sdkParams["offset"] = 0
+		} else {
+			sdkParams["offset"] = sdkParams["offset"].(int) + params.BatchSize
+		}
 
 		// update counts
 		previousTotalCollections = totalCollections
 		totalCollections = len(results.UUIDToCollection)
 
 		log.Printf("%d collections read, %d new in last batch, "+
+			"%d remaining, "+
 			"%s latest modified date, %.0f %d %d avg,max,total manifest size",
 			totalCollections,
 			totalCollections-previousTotalCollections,
+			remainingCollections,
 			sdkParams["filters"].([][]string)[0][2],
 			float32(totalManifestSize)/float32(totalCollections),
 			maxManifestSize, totalManifestSize)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list