[ARVADOS] updated: 2.1.0-2193-g1cb86079b

Git user git at public.arvados.org
Fri Apr 1 13:51:22 UTC 2022


Summary of changes:
 services/keep-web/handler.go                      |  8 ++
 tools/user-activity/arvados_user_activity/main.py | 95 ++++++++++++++++++-----
 2 files changed, 84 insertions(+), 19 deletions(-)

       via  1cb86079bf4976723c5bc5ac196f77c6d5ad5d75 (commit)
       via  dac71bdee15c42c74ab679495d0987d0235d3688 (commit)
       via  90163e501ad6eade4987303d1811b37114c5deae (commit)
       via  5e62be798776db7228fb316e2f7b1edabfde1dfb (commit)
       via  79c8245519881c01507fa092ba682c105f4a2358 (commit)
       via  d3d6d61e083cec30f4581c3eb9e942beb67c6712 (commit)
       via  70840372b4d7b07a896795007d9fc6aac77d4ee9 (commit)
      from  5b7ef5c97664a55ba1fac24110a7ed98e8894ac3 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 1cb86079bf4976723c5bc5ac196f77c6d5ad5d75
Author: Ward Vandewege <ward at curii.com>
Date:   Fri Apr 1 09:05:23 2022 -0400

    18903: make getCollectionName smarter: look up the name by uuid if it is
           available and fall back to look up by pdh if not. If the latter,
           always return the name of the oldest collection with that pdh.
    
           Clarify that start/end parameters are in UTC since all timestamps
           in the database are stored in UTC.
    
           Fix error when printing help text.
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/tools/user-activity/arvados_user_activity/main.py b/tools/user-activity/arvados_user_activity/main.py
index f078b8154..904c76a60 100755
--- a/tools/user-activity/arvados_user_activity/main.py
+++ b/tools/user-activity/arvados_user_activity/main.py
@@ -13,27 +13,47 @@ import ciso8601
 
 def parse_arguments(arguments):
     arg_parser = argparse.ArgumentParser()
-    arg_parser.add_argument('--start', help='Start date for the report in YYYY-MM-DD format')
-    arg_parser.add_argument('--end', help='End date for the report in YYYY-MM-DD format')
+    arg_parser.add_argument('--start', help='Start date for the report in YYYY-MM-DD format (UTC)')
+    arg_parser.add_argument('--end', help='End date for the report in YYYY-MM-DD format (UTC)')
     arg_parser.add_argument('--days', type=int, help='Number of days before now() to start the report')
     args = arg_parser.parse_args(arguments)
 
     if args.days and (args.start or args.end):
-        p.print_help()
+        arg_parser.print_help()
         print("Error: either specify --days or both --start and --end")
         exit(1)
 
     if not args.days and (not args.start or not args.end):
-        p.print_help()
-        print("Error: either specify --days or both --start and --end")
+        arg_parser.print_help()
+        print("\nError: either specify --days or both --start and --end")
         exit(1)
 
     if (args.start and not args.end) or (args.end and not args.start):
-        p.print_help()
-        print("Error: no start or end date found, either specify --days or both --start and --end")
+        arg_parser.print_help()
+        print("\nError: no start or end date found, either specify --days or both --start and --end")
         exit(1)
 
-    return args
+    if args.days:
+        to = datetime.datetime.utcnow()
+        since = to - datetime.timedelta(days=args.days)
+
+    if args.start:
+        try:
+            since = datetime.datetime.strptime(args.start,"%Y-%m-%d")
+        except:
+            arg_parser.print_help()
+            print("\nError: start date must be in YYYY-MM-DD format")
+            exit(1)
+
+    if args.end:
+        try:
+            to = datetime.datetime.strptime(args.end,"%Y-%m-%d")
+        except:
+            arg_parser.print_help()
+            print("\nError: end date must be in YYYY-MM-DD format")
+            exit(1)
+
+    return args, since, to
 
 def getowner(arv, uuid, owners):
     if uuid is None:
@@ -64,13 +84,26 @@ def getuserinfo(arv, uuid):
                                                        uuid, prof)
 
 collectionNameCache = {}
-def getCollectionName(arv, pdh):
-    if pdh not in collectionNameCache:
-        u = arv.collections().list(filters=[["portable_data_hash","=",pdh]]).execute().get("items")
+def getCollectionName(arv, uuid, pdh):
+    lookupField = uuid
+    filters = [["uuid","=",uuid]]
+    cached = uuid in collectionNameCache
+    # look up by uuid if it is available, fall back to look up by pdh
+    if len(uuid) != 27:
+        # Look up by pdh. Note that this can be misleading; the download could
+        # have happened from a collection with the same pdh but different name.
+        # We arbitrarily pick the oldest collection with the pdh to lookup the
+        # name, if the uuid for the request is not known.
+        lookupField = pdh
+        filters = [["portable_data_hash","=",pdh]]
+        cached = pdh in collectionNameCache
+
+    if not cached:
+        u = arv.collections().list(filters=filters,order="created_at",limit=1).execute().get("items")
         if len(u) < 1:
             return "(deleted)"
-        collectionNameCache[pdh] = u[0]["name"]
-    return collectionNameCache[pdh]
+        collectionNameCache[lookupField] = u[0]["name"]
+    return collectionNameCache[lookupField]
 
 def getname(u):
     return "\"%s\" (%s)" % (u["name"], u["uuid"])
@@ -79,30 +112,10 @@ def main(arguments=None):
     if arguments is None:
         arguments = sys.argv[1:]
 
-    args = parse_arguments(arguments)
+    args, since, to = parse_arguments(arguments)
 
     arv = arvados.api()
 
-    if args.days:
-        to = datetime.datetime.utcnow()
-        since = to - datetime.timedelta(days=args.days)
-
-    if args.start:
-        try:
-            since = datetime.datetime.strptime(args.start,"%Y-%m-%d")
-        except:
-            p.print_help()
-            print("Error: start date must be in YYYY-MM-DD format")
-            exit(1)
-
-    if args.end:
-        try:
-            to = datetime.datetime.strptime(args.end,"%Y-%m-%d")
-        except:
-            p.print_help()
-            print("Error: end date must be in YYYY-MM-DD format")
-            exit(1)
-
     print("User activity on %s between %s and %s\n" % (arv.config()["ClusterID"],
                                                        since.isoformat(sep=" ", timespec="minutes"),
                                                        to.isoformat(sep=" ", timespec="minutes")))
@@ -190,7 +203,7 @@ def main(arguments=None):
                 users.setdefault(e["object_uuid"], [])
                 users[e["object_uuid"]].append("%s Downloaded file \"%s\" from \"%s\" (%s) (%s)" % (event_at,
                                                                                        e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
-                                                                                       getCollectionName(arv, e["properties"].get("portable_data_hash")),
+                                                                                       getCollectionName(arv, e["properties"].get("collection_uuid"), e["properties"].get("portable_data_hash")),
                                                                                        e["properties"].get("collection_uuid"),
                                                                                        e["properties"].get("portable_data_hash")))
 
@@ -198,7 +211,7 @@ def main(arguments=None):
                 users.setdefault(e["object_uuid"], [])
                 users[e["object_uuid"]].append("%s Uploaded file \"%s\" to \"%s\" (%s)" % (event_at,
                                                                                     e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
-                                                                                    getCollectionName(arv, e["properties"].get("portable_data_hash")),
+                                                                                    getCollectionName(arv, e["properties"].get("collection_uuid"), e["properties"].get("portable_data_hash")),
                                                                                     e["properties"].get("collection_uuid")))
 
         else:

commit dac71bdee15c42c74ab679495d0987d0235d3688
Author: Ward Vandewege <ward at curii.com>
Date:   Fri Mar 25 20:30:09 2022 -0400

    18903: add a date range option to the user activity report.
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/tools/user-activity/arvados_user_activity/main.py b/tools/user-activity/arvados_user_activity/main.py
index 3fcba5748..f078b8154 100755
--- a/tools/user-activity/arvados_user_activity/main.py
+++ b/tools/user-activity/arvados_user_activity/main.py
@@ -13,8 +13,26 @@ import ciso8601
 
 def parse_arguments(arguments):
     arg_parser = argparse.ArgumentParser()
-    arg_parser.add_argument('--days', type=int, required=True)
+    arg_parser.add_argument('--start', help='Start date for the report in YYYY-MM-DD format')
+    arg_parser.add_argument('--end', help='End date for the report in YYYY-MM-DD format')
+    arg_parser.add_argument('--days', type=int, help='Number of days before now() to start the report')
     args = arg_parser.parse_args(arguments)
+
+    if args.days and (args.start or args.end):
+        p.print_help()
+        print("Error: either specify --days or both --start and --end")
+        exit(1)
+
+    if not args.days and (not args.start or not args.end):
+        p.print_help()
+        print("Error: either specify --days or both --start and --end")
+        exit(1)
+
+    if (args.start and not args.end) or (args.end and not args.start):
+        p.print_help()
+        print("Error: no start or end date found, either specify --days or both --start and --end")
+        exit(1)
+
     return args
 
 def getowner(arv, uuid, owners):
@@ -65,13 +83,31 @@ def main(arguments=None):
 
     arv = arvados.api()
 
-    since = datetime.datetime.utcnow() - datetime.timedelta(days=args.days)
+    if args.days:
+        to = datetime.datetime.utcnow()
+        since = to - datetime.timedelta(days=args.days)
+
+    if args.start:
+        try:
+            since = datetime.datetime.strptime(args.start,"%Y-%m-%d")
+        except:
+            p.print_help()
+            print("Error: start date must be in YYYY-MM-DD format")
+            exit(1)
+
+    if args.end:
+        try:
+            to = datetime.datetime.strptime(args.end,"%Y-%m-%d")
+        except:
+            p.print_help()
+            print("Error: end date must be in YYYY-MM-DD format")
+            exit(1)
 
     print("User activity on %s between %s and %s\n" % (arv.config()["ClusterID"],
-                                                       (datetime.datetime.now() - datetime.timedelta(days=args.days)).isoformat(sep=" ", timespec="minutes"),
-                                                       datetime.datetime.now().isoformat(sep=" ", timespec="minutes")))
+                                                       since.isoformat(sep=" ", timespec="minutes"),
+                                                       to.isoformat(sep=" ", timespec="minutes")))
 
-    events = arvados.util.keyset_list_all(arv.logs().list, filters=[["created_at", ">=", since.isoformat()]])
+    events = arvados.util.keyset_list_all(arv.logs().list, filters=[["created_at", ">=", since.isoformat()],["created_at", "<", to.isoformat()]])
 
     users = {}
     owners = {}

commit 90163e501ad6eade4987303d1811b37114c5deae
Author: Ward Vandewege <ward at curii.com>
Date:   Fri Mar 25 17:33:43 2022 -0400

    18903: handle deleted users properly.
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/tools/user-activity/arvados_user_activity/main.py b/tools/user-activity/arvados_user_activity/main.py
index 2f38db3be..3fcba5748 100755
--- a/tools/user-activity/arvados_user_activity/main.py
+++ b/tools/user-activity/arvados_user_activity/main.py
@@ -33,7 +33,11 @@ def getowner(arv, uuid, owners):
     return getowner(arv, owners[uuid], owners)
 
 def getuserinfo(arv, uuid):
-    u = arv.users().get(uuid=uuid).execute()
+    try:
+        u = arv.users().get(uuid=uuid).execute()
+    except:
+        return "deleted user (%susers/%s)" % (arv.config()["Services"]["Workbench1"]["ExternalURL"],
+                                                       uuid)
     prof = "\n".join("  %s: \"%s\"" % (k, v) for k, v in u["prefs"].get("profile", {}).items() if v)
     if prof:
         prof = "\n"+prof+"\n"

commit 5e62be798776db7228fb316e2f7b1edabfde1dfb
Author: Ward Vandewege <ward at curii.com>
Date:   Wed Mar 23 14:08:09 2022 -0400

    18903: keep-web: when logging a file upload or download, if the
           collection UUID is not known, blank the field rather than
           populating it with the PDH (which is also present in a separate
           field).
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 97ec95e3a..ef61b0687 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -913,6 +913,14 @@ func (h *handler) logUploadOrDownload(
 			WithField("collection_file_path", filepath)
 		props["collection_uuid"] = collection.UUID
 		props["collection_file_path"] = filepath
+		// h.determineCollection populates the collection_uuid prop with the PDH, if
+		// this collection is being accessed via PDH. In that case, blank the
+		// collection_uuid field so that consumers of the log entries can rely on it
+		// being a UUID, or blank. The PDH remains available via the
+		// portable_data_hash property.
+		if props["collection_uuid"] == collection.PortableDataHash {
+			props["collection_uuid"] = ""
+		}
 	}
 	if r.Method == "PUT" || r.Method == "POST" {
 		log.Info("File upload")

commit 79c8245519881c01507fa092ba682c105f4a2358
Author: Ward Vandewege <ward at curii.com>
Date:   Wed Mar 23 13:04:48 2022 -0400

    18903: formatting fix for output: always prepend the event timestamp
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/tools/user-activity/arvados_user_activity/main.py b/tools/user-activity/arvados_user_activity/main.py
index 15383cd90..2f38db3be 100755
--- a/tools/user-activity/arvados_user_activity/main.py
+++ b/tools/user-activity/arvados_user_activity/main.py
@@ -121,7 +121,7 @@ def main(arguments=None):
             elif e["properties"]["new_attributes"]["link_class"] == "permission":
                 users[owner].append("%s Shared %s with %s" % (event_at, e["properties"]["new_attributes"]["tail_uuid"], e["properties"]["new_attributes"]["head_uuid"]))
             else:
-                users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
+                users[owner].append("%s %s %s %s %s" % (event_at, e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
 
         elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "o0j2j":
             if e["properties"]["old_attributes"]["link_class"] == "tag":
@@ -129,7 +129,7 @@ def main(arguments=None):
             elif e["properties"]["old_attributes"]["link_class"] == "permission":
                 users[owner].append("%s Unshared %s with %s" % (event_at, e["properties"]["old_attributes"]["tail_uuid"], e["properties"]["old_attributes"]["head_uuid"]))
             else:
-                users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
+                users[owner].append("%s %s %s %s %s" % (event_at, e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
 
         elif e["event_type"] == "create" and e["object_uuid"][6:11] == "4zz18":
             if e["properties"]["new_attributes"]["properties"].get("type") in ("log", "output", "intermediate"):
@@ -162,7 +162,7 @@ def main(arguments=None):
                                                                                     e["properties"].get("collection_uuid")))
 
         else:
-            users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
+            users[owner].append("%s %s %s %s %s" % (event_at, e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
 
     for k,v in users.items():
         if k is None or k.endswith("-tpzed-000000000000000"):

commit d3d6d61e083cec30f4581c3eb9e942beb67c6712
Author: Ward Vandewege <ward at curii.com>
Date:   Wed Mar 23 12:07:51 2022 -0400

    18903: the user activity script needs to look up collections by PDH, not
           UUID (that field is not always populated with a UUID).
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/tools/user-activity/arvados_user_activity/main.py b/tools/user-activity/arvados_user_activity/main.py
index 841685f01..15383cd90 100755
--- a/tools/user-activity/arvados_user_activity/main.py
+++ b/tools/user-activity/arvados_user_activity/main.py
@@ -42,11 +42,13 @@ def getuserinfo(arv, uuid):
                                                        uuid, prof)
 
 collectionNameCache = {}
-def getCollectionName(arv, uuid):
-    if uuid not in collectionNameCache:
-        u = arv.collections().get(uuid=uuid).execute()
-        collectionNameCache[uuid] = u["name"]
-    return collectionNameCache[uuid]
+def getCollectionName(arv, pdh):
+    if pdh not in collectionNameCache:
+        u = arv.collections().list(filters=[["portable_data_hash","=",pdh]]).execute().get("items")
+        if len(u) < 1:
+            return "(deleted)"
+        collectionNameCache[pdh] = u[0]["name"]
+    return collectionNameCache[pdh]
 
 def getname(u):
     return "\"%s\" (%s)" % (u["name"], u["uuid"])
@@ -148,7 +150,7 @@ def main(arguments=None):
                 users.setdefault(e["object_uuid"], [])
                 users[e["object_uuid"]].append("%s Downloaded file \"%s\" from \"%s\" (%s) (%s)" % (event_at,
                                                                                        e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
-                                                                                       getCollectionName(arv, e["properties"].get("collection_uuid")),
+                                                                                       getCollectionName(arv, e["properties"].get("portable_data_hash")),
                                                                                        e["properties"].get("collection_uuid"),
                                                                                        e["properties"].get("portable_data_hash")))
 
@@ -156,7 +158,7 @@ def main(arguments=None):
                 users.setdefault(e["object_uuid"], [])
                 users[e["object_uuid"]].append("%s Uploaded file \"%s\" to \"%s\" (%s)" % (event_at,
                                                                                     e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
-                                                                                    getCollectionName(arv, e["properties"].get("collection_uuid")),
+                                                                                    getCollectionName(arv, e["properties"].get("portable_data_hash")),
                                                                                     e["properties"].get("collection_uuid")))
 
         else:

commit 70840372b4d7b07a896795007d9fc6aac77d4ee9
Author: Ward Vandewege <ward at curii.com>
Date:   Wed Mar 23 11:00:08 2022 -0400

    18903: fix uninitialized user object in the user activity script.
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/tools/user-activity/arvados_user_activity/main.py b/tools/user-activity/arvados_user_activity/main.py
index 997da57e0..841685f01 100755
--- a/tools/user-activity/arvados_user_activity/main.py
+++ b/tools/user-activity/arvados_user_activity/main.py
@@ -145,6 +145,7 @@ def main(arguments=None):
                 users[owner].append("%s Deleted collection %s %s" % (event_at, getname(e["properties"]["old_attributes"]), loguuid))
 
         elif e["event_type"] == "file_download":
+                users.setdefault(e["object_uuid"], [])
                 users[e["object_uuid"]].append("%s Downloaded file \"%s\" from \"%s\" (%s) (%s)" % (event_at,
                                                                                        e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
                                                                                        getCollectionName(arv, e["properties"].get("collection_uuid")),
@@ -152,6 +153,7 @@ def main(arguments=None):
                                                                                        e["properties"].get("portable_data_hash")))
 
         elif e["event_type"] == "file_upload":
+                users.setdefault(e["object_uuid"], [])
                 users[e["object_uuid"]].append("%s Uploaded file \"%s\" to \"%s\" (%s)" % (event_at,
                                                                                     e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
                                                                                     getCollectionName(arv, e["properties"].get("collection_uuid")),

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list