[ARVADOS] updated: 58e1942ff008780d88a3e512dd7806c6a3b975a6
Git user
git at public.curoverse.com
Fri Feb 19 16:45:35 EST 2016
Summary of changes:
services/datamanager/collection/collection.go | 34 +++++++++++++++++++++------
1 file changed, 27 insertions(+), 7 deletions(-)
via 58e1942ff008780d88a3e512dd7806c6a3b975a6 (commit)
via bbd85b230ccd208ef942792109fed1c9161f9429 (commit)
via 3091c9c336ddca217b14745142f9473a489f42de (commit)
from 23cf2d8b1245c4594bf13b5904e8b72600645d0f (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 58e1942ff008780d88a3e512dd7806c6a3b975a6
Merge: 23cf2d8 bbd85b2
Author: Tom Clegg <tom at curoverse.com>
Date: Fri Feb 19 16:45:18 2016 -0500
Merge branch '8485-datamanager-identical-modifiedat' of https://github.com/wtsi-hgi/arvados closes #8485
commit bbd85b230ccd208ef942792109fed1c9161f9429
Author: Joshua C. Randall <jcrandall at alum.mit.edu>
Date: Fri Feb 19 12:27:08 2016 +0000
Makes changes suggested by tomclegg in review of PR #39
diff --git a/services/datamanager/collection/collection.go b/services/datamanager/collection/collection.go
index 1d41a70..f0a0f3f 100644
--- a/services/datamanager/collection/collection.go
+++ b/services/datamanager/collection/collection.go
@@ -130,7 +130,7 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
sdkParams := arvadosclient.Dict{
"select": fieldsWanted,
- "order": []string{"modified_at ASC"},
+ "order": []string{"modified_at ASC", "uuid ASC"},
"filters": [][]string{[]string{"modified_at", ">=", "1900-01-01T00:00:00Z"}},
"offset": 0}
@@ -195,13 +195,17 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
if err != nil {
return
}
+ batchCollections := len(collections.Items)
- // Update count of remaining collections
- remainingCollections = collections.ItemsAvailable - params.BatchSize - sdkParams["offset"].(int)
- if remainingCollections < 0 {
- remainingCollections = 0
+ // We must always have at least one collection in the batch
+ if batchCollections < 1 {
+ err = fmt.Errorf("API query returned no collections for %+v", sdkParams)
+ return
}
+ // Update count of remaining collections
+ remainingCollections = collections.ItemsAvailable - sdkParams["offset"].(int) - batchCollections
+
// Process collection and update our date filter.
latestModificationDate, maxManifestSize, totalManifestSize, err := ProcessCollections(params.Logger,
collections.Items,
@@ -214,17 +218,18 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
sdkParams["filters"].([][]string)[0][2] = latestModificationDate.Format(time.RFC3339)
sdkParams["offset"] = 0
} else {
- sdkParams["offset"] = sdkParams["offset"].(int) + params.BatchSize
+ sdkParams["offset"] = sdkParams["offset"].(int) + batchCollections
}
// update counts
previousTotalCollections = totalCollections
totalCollections = len(results.UUIDToCollection)
- log.Printf("%d collections read, %d new in last batch, "+
+ log.Printf("%d collections read, %d (%d new) in last batch, "+
"%d remaining, "+
"%s latest modified date, %.0f %d %d avg,max,total manifest size",
totalCollections,
+ batchCollections,
totalCollections-previousTotalCollections,
remainingCollections,
sdkParams["filters"].([][]string)[0][2],
commit 3091c9c336ddca217b14745142f9473a489f42de
Author: Joshua C. Randall <jcrandall at alum.mit.edu>
Date: Thu Feb 18 14:36:53 2016 +0000
Changes GetCollection loop to more reliably fetch all collections
Modifies the loop termination condition in GetCollection so that
it continues until there are no more items available according to
the API server (returned in collections.ItemsAvailable).
Modifies the query code so that it uses an offset to page through
results in case an entire batch has equal modified_at timestamps.
diff --git a/services/datamanager/collection/collection.go b/services/datamanager/collection/collection.go
index 1229f29..1d41a70 100644
--- a/services/datamanager/collection/collection.go
+++ b/services/datamanager/collection/collection.go
@@ -131,7 +131,8 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
sdkParams := arvadosclient.Dict{
"select": fieldsWanted,
"order": []string{"modified_at ASC"},
- "filters": [][]string{[]string{"modified_at", ">=", "1900-01-01T00:00:00Z"}}}
+ "filters": [][]string{[]string{"modified_at", ">=", "1900-01-01T00:00:00Z"}},
+ "offset": 0}
if params.BatchSize > 0 {
sdkParams["limit"] = params.BatchSize
@@ -176,9 +177,10 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
// These values are just for getting the loop to run the first time,
// afterwards they'll be set to real values.
- previousTotalCollections := -1
- totalCollections := 0
- for totalCollections > previousTotalCollections {
+ remainingCollections := 1
+ var totalCollections int
+ var previousTotalCollections int
+ for remainingCollections > 0 {
// We're still finding new collections
// Write the heap profile for examining memory usage
@@ -194,6 +196,12 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
return
}
+ // Update count of remaining collections
+ remainingCollections = collections.ItemsAvailable - params.BatchSize - sdkParams["offset"].(int)
+ if remainingCollections < 0 {
+ remainingCollections = 0
+ }
+
// Process collection and update our date filter.
latestModificationDate, maxManifestSize, totalManifestSize, err := ProcessCollections(params.Logger,
collections.Items,
@@ -202,16 +210,23 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
if err != nil {
return results, err
}
- sdkParams["filters"].([][]string)[0][2] = latestModificationDate.Format(time.RFC3339)
+ if sdkParams["filters"].([][]string)[0][2] != latestModificationDate.Format(time.RFC3339) {
+ sdkParams["filters"].([][]string)[0][2] = latestModificationDate.Format(time.RFC3339)
+ sdkParams["offset"] = 0
+ } else {
+ sdkParams["offset"] = sdkParams["offset"].(int) + params.BatchSize
+ }
// update counts
previousTotalCollections = totalCollections
totalCollections = len(results.UUIDToCollection)
log.Printf("%d collections read, %d new in last batch, "+
+ "%d remaining, "+
"%s latest modified date, %.0f %d %d avg,max,total manifest size",
totalCollections,
totalCollections-previousTotalCollections,
+ remainingCollections,
sdkParams["filters"].([][]string)[0][2],
float32(totalManifestSize)/float32(totalCollections),
maxManifestSize, totalManifestSize)
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list