[ARVADOS] updated: 2.1.0-2171-g1825273af

Git user git at public.arvados.org
Sat Mar 26 19:04:10 UTC 2022


Summary of changes:
 tools/user-activity/arvados_user_activity/main.py | 178 +++++++++++-----------
 1 file changed, 92 insertions(+), 86 deletions(-)

       via  1825273afecc039a21c1da575a5f156e089b0478 (commit)
      from  c3c1f4261f61bc52dbc8fadad644520797b3f6a6 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 1825273afecc039a21c1da575a5f156e089b0478
Author: Ward Vandewege <ward at curii.com>
Date:   Sat Mar 26 15:01:28 2022 -0400

    18903: rather than doing one database query for the entire period, query
           the database day by day. The logs table is often huge, and doing
           a date range query across it can become very expensive. Having
           keyset_list_all do it many times as it pages through the results
           amplifies the number of slow queries dramatically. Doing a day at
           a time is more efficient.
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/tools/user-activity/arvados_user_activity/main.py b/tools/user-activity/arvados_user_activity/main.py
index f078b8154..deef90d6c 100755
--- a/tools/user-activity/arvados_user_activity/main.py
+++ b/tools/user-activity/arvados_user_activity/main.py
@@ -107,102 +107,108 @@ def main(arguments=None):
                                                        since.isoformat(sep=" ", timespec="minutes"),
                                                        to.isoformat(sep=" ", timespec="minutes")))
 
-    events = arvados.util.keyset_list_all(arv.logs().list, filters=[["created_at", ">=", since.isoformat()],["created_at", "<", to.isoformat()]])
-
     users = {}
     owners = {}
 
-    for e in events:
-        owner = getowner(arv, e["object_owner_uuid"], owners)
-        users.setdefault(owner, [])
-        event_at = ciso8601.parse_datetime(e["event_at"]).astimezone().isoformat(sep=" ", timespec="minutes")
-        # loguuid = e["uuid"]
-        loguuid = ""
-
-        if e["event_type"] == "create" and e["object_uuid"][6:11] == "tpzed":
-            users.setdefault(e["object_uuid"], [])
-            users[e["object_uuid"]].append("%s User account created" % event_at)
-
-        elif e["event_type"] == "update" and e["object_uuid"][6:11] == "tpzed":
-            pass
-
-        elif e["event_type"] == "create" and e["object_uuid"][6:11] == "xvhdp":
-            if e["properties"]["new_attributes"]["requesting_container_uuid"] is None:
-                users[owner].append("%s Ran container %s %s" % (event_at, getname(e["properties"]["new_attributes"]), loguuid))
-
-        elif e["event_type"] == "update" and e["object_uuid"][6:11] == "xvhdp":
-            pass
-
-        elif e["event_type"] == "create" and e["object_uuid"][6:11] == "j7d0g":
-            users[owner].append("%s Created project %s" %  (event_at, getname(e["properties"]["new_attributes"])))
-
-        elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "j7d0g":
-            users[owner].append("%s Deleted project %s" % (event_at, getname(e["properties"]["old_attributes"])))
-
-        elif e["event_type"] == "update" and e["object_uuid"][6:11] == "j7d0g":
-            users[owner].append("%s Updated project %s" % (event_at, getname(e["properties"]["new_attributes"])))
-
-        elif e["event_type"] in ("create", "update") and e["object_uuid"][6:11] == "gj3su":
-            since_last = None
-            if len(users[owner]) > 0 and users[owner][-1].endswith("activity"):
-                sp = users[owner][-1].split(" ")
-                start = sp[0]+" "+sp[1]
-                since_last = ciso8601.parse_datetime(event_at) - ciso8601.parse_datetime(sp[3]+" "+sp[4])
-                span = ciso8601.parse_datetime(event_at) - ciso8601.parse_datetime(start)
-
-            if since_last is not None and since_last < datetime.timedelta(minutes=61):
-                users[owner][-1] = "%s to %s (%02d:%02d) Account activity" % (start, event_at, span.days*24 + int(span.seconds/3600), int((span.seconds % 3600)/60))
-            else:
-                users[owner].append("%s to %s (0:00) Account activity" % (event_at, event_at))
-
-        elif e["event_type"] == "create" and e["object_uuid"][6:11] == "o0j2j":
-            if e["properties"]["new_attributes"]["link_class"] == "tag":
-                users[owner].append("%s Tagged %s" % (event_at, e["properties"]["new_attributes"]["head_uuid"]))
-            elif e["properties"]["new_attributes"]["link_class"] == "permission":
-                users[owner].append("%s Shared %s with %s" % (event_at, e["properties"]["new_attributes"]["tail_uuid"], e["properties"]["new_attributes"]["head_uuid"]))
-            else:
-                users[owner].append("%s %s %s %s %s" % (event_at, e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
-
-        elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "o0j2j":
-            if e["properties"]["old_attributes"]["link_class"] == "tag":
-                users[owner].append("%s Untagged %s" % (event_at, e["properties"]["old_attributes"]["head_uuid"]))
-            elif e["properties"]["old_attributes"]["link_class"] == "permission":
-                users[owner].append("%s Unshared %s with %s" % (event_at, e["properties"]["old_attributes"]["tail_uuid"], e["properties"]["old_attributes"]["head_uuid"]))
-            else:
-                users[owner].append("%s %s %s %s %s" % (event_at, e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
+    # iterate day by day to avoid overly expensive database queries
+    a = since
+    b = since + datetime.timedelta(days=1)
+    while b <= to:
+        events = arvados.util.keyset_list_all(arv.logs().list, filters=[["created_at", ">=", a.isoformat()],["created_at", "<", b.isoformat()]])
+        a = b
+        b = a + datetime.timedelta(days=1)
+
+        for e in events:
+            owner = getowner(arv, e["object_owner_uuid"], owners)
+            users.setdefault(owner, [])
+            event_at = ciso8601.parse_datetime(e["event_at"]).astimezone().isoformat(sep=" ", timespec="minutes")
+            # loguuid = e["uuid"]
+            loguuid = ""
+
+            if e["event_type"] == "create" and e["object_uuid"][6:11] == "tpzed":
+                users.setdefault(e["object_uuid"], [])
+                users[e["object_uuid"]].append("%s User account created" % event_at)
 
-        elif e["event_type"] == "create" and e["object_uuid"][6:11] == "4zz18":
-            if e["properties"]["new_attributes"]["properties"].get("type") in ("log", "output", "intermediate"):
+            elif e["event_type"] == "update" and e["object_uuid"][6:11] == "tpzed":
                 pass
-            else:
-                users[owner].append("%s Created collection %s %s" % (event_at, getname(e["properties"]["new_attributes"]), loguuid))
 
-        elif e["event_type"] == "update" and e["object_uuid"][6:11] == "4zz18":
-            users[owner].append("%s Updated collection %s %s" % (event_at, getname(e["properties"]["new_attributes"]), loguuid))
+            elif e["event_type"] == "create" and e["object_uuid"][6:11] == "xvhdp":
+                if e["properties"]["new_attributes"]["requesting_container_uuid"] is None:
+                    users[owner].append("%s Ran container %s %s" % (event_at, getname(e["properties"]["new_attributes"]), loguuid))
 
-        elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "4zz18":
-            if e["properties"]["old_attributes"]["properties"].get("type") in ("log", "output", "intermediate"):
+            elif e["event_type"] == "update" and e["object_uuid"][6:11] == "xvhdp":
                 pass
-            else:
-                users[owner].append("%s Deleted collection %s %s" % (event_at, getname(e["properties"]["old_attributes"]), loguuid))
-
-        elif e["event_type"] == "file_download":
-                users.setdefault(e["object_uuid"], [])
-                users[e["object_uuid"]].append("%s Downloaded file \"%s\" from \"%s\" (%s) (%s)" % (event_at,
-                                                                                       e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
-                                                                                       getCollectionName(arv, e["properties"].get("portable_data_hash")),
-                                                                                       e["properties"].get("collection_uuid"),
-                                                                                       e["properties"].get("portable_data_hash")))
 
-        elif e["event_type"] == "file_upload":
-                users.setdefault(e["object_uuid"], [])
-                users[e["object_uuid"]].append("%s Uploaded file \"%s\" to \"%s\" (%s)" % (event_at,
-                                                                                    e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
-                                                                                    getCollectionName(arv, e["properties"].get("portable_data_hash")),
-                                                                                    e["properties"].get("collection_uuid")))
+            elif e["event_type"] == "create" and e["object_uuid"][6:11] == "j7d0g":
+                users[owner].append("%s Created project %s" %  (event_at, getname(e["properties"]["new_attributes"])))
+
+            elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "j7d0g":
+                users[owner].append("%s Deleted project %s" % (event_at, getname(e["properties"]["old_attributes"])))
+
+            elif e["event_type"] == "update" and e["object_uuid"][6:11] == "j7d0g":
+                users[owner].append("%s Updated project %s" % (event_at, getname(e["properties"]["new_attributes"])))
+
+            elif e["event_type"] in ("create", "update") and e["object_uuid"][6:11] == "gj3su":
+                since_last = None
+                if len(users[owner]) > 0 and users[owner][-1].endswith("activity"):
+                    sp = users[owner][-1].split(" ")
+                    start = sp[0]+" "+sp[1]
+                    since_last = ciso8601.parse_datetime(event_at) - ciso8601.parse_datetime(sp[3]+" "+sp[4])
+                    span = ciso8601.parse_datetime(event_at) - ciso8601.parse_datetime(start)
+
+                if since_last is not None and since_last < datetime.timedelta(minutes=61):
+                    users[owner][-1] = "%s to %s (%02d:%02d) Account activity" % (start, event_at, span.days*24 + int(span.seconds/3600), int((span.seconds % 3600)/60))
+                else:
+                    users[owner].append("%s to %s (0:00) Account activity" % (event_at, event_at))
+
+            elif e["event_type"] == "create" and e["object_uuid"][6:11] == "o0j2j":
+                if e["properties"]["new_attributes"]["link_class"] == "tag":
+                    users[owner].append("%s Tagged %s" % (event_at, e["properties"]["new_attributes"]["head_uuid"]))
+                elif e["properties"]["new_attributes"]["link_class"] == "permission":
+                    users[owner].append("%s Shared %s with %s" % (event_at, e["properties"]["new_attributes"]["tail_uuid"], e["properties"]["new_attributes"]["head_uuid"]))
+                else:
+                    users[owner].append("%s %s %s %s %s" % (event_at, e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
+
+            elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "o0j2j":
+                if e["properties"]["old_attributes"]["link_class"] == "tag":
+                    users[owner].append("%s Untagged %s" % (event_at, e["properties"]["old_attributes"]["head_uuid"]))
+                elif e["properties"]["old_attributes"]["link_class"] == "permission":
+                    users[owner].append("%s Unshared %s with %s" % (event_at, e["properties"]["old_attributes"]["tail_uuid"], e["properties"]["old_attributes"]["head_uuid"]))
+                else:
+                    users[owner].append("%s %s %s %s %s" % (event_at, e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
+
+            elif e["event_type"] == "create" and e["object_uuid"][6:11] == "4zz18":
+                if e["properties"]["new_attributes"]["properties"].get("type") in ("log", "output", "intermediate"):
+                    pass
+                else:
+                    users[owner].append("%s Created collection %s %s" % (event_at, getname(e["properties"]["new_attributes"]), loguuid))
+
+            elif e["event_type"] == "update" and e["object_uuid"][6:11] == "4zz18":
+                users[owner].append("%s Updated collection %s %s" % (event_at, getname(e["properties"]["new_attributes"]), loguuid))
+
+            elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "4zz18":
+                if e["properties"]["old_attributes"]["properties"].get("type") in ("log", "output", "intermediate"):
+                    pass
+                else:
+                    users[owner].append("%s Deleted collection %s %s" % (event_at, getname(e["properties"]["old_attributes"]), loguuid))
+
+            elif e["event_type"] == "file_download":
+                    users.setdefault(e["object_uuid"], [])
+                    users[e["object_uuid"]].append("%s Downloaded file \"%s\" from \"%s\" (%s) (%s)" % (event_at,
+                                                                                           e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
+                                                                                           getCollectionName(arv, e["properties"].get("portable_data_hash")),
+                                                                                           e["properties"].get("collection_uuid"),
+                                                                                           e["properties"].get("portable_data_hash")))
+
+            elif e["event_type"] == "file_upload":
+                    users.setdefault(e["object_uuid"], [])
+                    users[e["object_uuid"]].append("%s Uploaded file \"%s\" to \"%s\" (%s)" % (event_at,
+                                                                                        e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
+                                                                                        getCollectionName(arv, e["properties"].get("portable_data_hash")),
+                                                                                        e["properties"].get("collection_uuid")))
 
-        else:
-            users[owner].append("%s %s %s %s %s" % (event_at, e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
+            else:
+                users[owner].append("%s %s %s %s %s" % (event_at, e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
 
     for k,v in users.items():
         if k is None or k.endswith("-tpzed-000000000000000"):

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list