[arvados] created: 2.7.0-5392-g65a84ec380
git repository hosting
git at public.arvados.org
Tue Nov 21 13:25:20 UTC 2023
at 65a84ec3801f6b76ccc2e6af07511851b4dc3aba (commit)
commit 65a84ec3801f6b76ccc2e6af07511851b4dc3aba
Author: Brett Smith <brett.smith at curii.com>
Date: Mon Nov 20 16:31:17 2023 -0500
18800: Add docstring to arvados.__init__
This is mostly intended to help orient people reading the web
documentation.
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/__init__.py b/sdk/python/arvados/__init__.py
index 8a7a151ee0..e90f381298 100644
--- a/sdk/python/arvados/__init__.py
+++ b/sdk/python/arvados/__init__.py
@@ -1,6 +1,23 @@
# Copyright (C) The Arvados Authors. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
+"""Arvados Python SDK
+
+This module provides the entire Python SDK for Arvados. The most useful modules
+include:
+
+* arvados.api - After you `import arvados`, you can call `arvados.api.api` as
+ `arvados.api` to construct a client object.
+
+* arvados.collection - The `arvados.collection.Collection` class provides a
+ high-level interface to read and write collections. It coordinates sending
+ data to and from Keep, and synchronizing updates with the collection object.
+
+* arvados.util - Utility functions to use mostly in conjunction with the API
+ client object and the results it returns.
+
+Other submodules provide lower-level functionality.
+"""
import logging as stdliblog
import os
commit 54fe1ce40202881c553c2cb7feffaae68a1da14e
Author: Brett Smith <brett.smith at curii.com>
Date: Mon Nov 20 16:20:37 2023 -0500
19830: Clean imports in arvados.__init__
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/__init__.py b/sdk/python/arvados/__init__.py
index 21ca72c4bd..8a7a151ee0 100644
--- a/sdk/python/arvados/__init__.py
+++ b/sdk/python/arvados/__init__.py
@@ -2,31 +2,12 @@
#
# SPDX-License-Identifier: Apache-2.0
-from __future__ import print_function
-from __future__ import absolute_import
-from future import standard_library
-standard_library.install_aliases()
-from builtins import object
-import bz2
-import fcntl
-import hashlib
-import http.client
-import httplib2
-import json
import logging as stdliblog
import os
-import pprint
-import re
-import string
import sys
-import time
import types
-import zlib
-if sys.version_info >= (3, 0):
- from collections import UserDict
-else:
- from UserDict import UserDict
+from collections import UserDict
from .api import api, api_from_config, http_cache
from .collection import CollectionReader, CollectionWriter, ResumableCollectionWriter
commit b4860265ff2c49e81267577112092c9fd66d94ab
Author: Brett Smith <brett.smith at curii.com>
Date: Mon Nov 20 15:32:28 2023 -0500
19830: Clean run_test_server imports
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/tests/run_test_server.py b/sdk/python/tests/run_test_server.py
index a5dd88a9c5..a2cc6030a6 100644
--- a/sdk/python/tests/run_test_server.py
+++ b/sdk/python/tests/run_test_server.py
@@ -2,10 +2,6 @@
#
# SPDX-License-Identifier: Apache-2.0
-from __future__ import print_function
-from __future__ import division
-from builtins import str
-from builtins import range
import argparse
import atexit
import errno
@@ -18,7 +14,6 @@ import shlex
import shutil
import signal
import socket
-import string
import subprocess
import sys
import tempfile
@@ -26,10 +21,7 @@ import time
import unittest
import yaml
-try:
- from urllib.parse import urlparse
-except ImportError:
- from urlparse import urlparse
+from urllib.parse import urlparse
MY_DIRNAME = os.path.dirname(os.path.realpath(__file__))
if __name__ == '__main__' and os.path.exists(
commit be5f76b14dbec67840e130284321165d49d3572d
Author: Brett Smith <brett.smith at curii.com>
Date: Mon Nov 20 13:32:40 2023 -0500
19830: Reorder arvados.util regexps
Keep regexps, then UUID regexps sorted alphabetically, then deprecated
regexps.
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py
index 5c4de30907..050c67f68d 100644
--- a/sdk/python/arvados/util.py
+++ b/sdk/python/arvados/util.py
@@ -47,16 +47,25 @@ signed_locator_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+(\+\S+)*\+A\S+(\+\S+)*
"""Regular expression to match any Keep block locator with an access token hint"""
portable_data_hash_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+')
"""Regular expression to match any collection portable data hash"""
+manifest_pattern = re.compile(r'((\S+)( +[a-f0-9]{32}(\+[0-9]+)(\+\S+)*)+( +[0-9]+:[0-9]+:\S+)+$)+', flags=re.MULTILINE)
+"""Regular expression to match an Arvados collection manifest text"""
+keep_file_locator_pattern = re.compile(r'([0-9a-f]{32}\+[0-9]+)/(.*)')
+"""Regular expression to match a file path from a collection identified by portable data hash"""
+keepuri_pattern = re.compile(r'keep:([0-9a-f]{32}\+[0-9]+)/(.*)')
+"""Regular expression to match a `keep:` URI with a collection identified by portable data hash"""
+
uuid_pattern = re.compile(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}')
"""Regular expression to match any Arvados object UUID"""
collection_uuid_pattern = re.compile(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}')
"""Regular expression to match any Arvados collection UUID"""
+container_uuid_pattern = re.compile(r'[a-z0-9]{5}-dz642-[a-z0-9]{15}')
+"""Regular expression to match any Arvados container UUID"""
group_uuid_pattern = re.compile(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}')
"""Regular expression to match any Arvados group UUID"""
-user_uuid_pattern = re.compile(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}')
-"""Regular expression to match any Arvados user UUID"""
link_uuid_pattern = re.compile(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}')
"""Regular expression to match any Arvados link UUID"""
+user_uuid_pattern = re.compile(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}')
+"""Regular expression to match any Arvados user UUID"""
job_uuid_pattern = re.compile(r'[a-z0-9]{5}-8i9sb-[a-z0-9]{15}')
"""Regular expression to match any Arvados job UUID
@@ -64,14 +73,6 @@ job_uuid_pattern = re.compile(r'[a-z0-9]{5}-8i9sb-[a-z0-9]{15}')
Arvados job resources are deprecated and will be removed in a future
release. Prefer the containers API instead.
"""
-container_uuid_pattern = re.compile(r'[a-z0-9]{5}-dz642-[a-z0-9]{15}')
-"""Regular expression to match any Arvados container UUID"""
-manifest_pattern = re.compile(r'((\S+)( +[a-f0-9]{32}(\+[0-9]+)(\+\S+)*)+( +[0-9]+:[0-9]+:\S+)+$)+', flags=re.MULTILINE)
-"""Regular expression to match an Arvados collection manifest text"""
-keep_file_locator_pattern = re.compile(r'([0-9a-f]{32}\+[0-9]+)/(.*)')
-"""Regular expression to match a file path from a collection identified by portable data hash"""
-keepuri_pattern = re.compile(r'keep:([0-9a-f]{32}\+[0-9]+)/(.*)')
-"""Regular expression to match a `keep:` URI with a collection identified by portable data hash"""
def _deprecated(version=None, preferred=None):
"""Mark a callable as deprecated in the SDK
commit 8699292036b77373cf60a6a45c013101b90118de
Author: Brett Smith <brett.smith at curii.com>
Date: Mon Nov 20 13:31:17 2023 -0500
19830: Add docstrings to arvados.util
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py
index cd9b19ce63..5c4de30907 100644
--- a/sdk/python/arvados/util.py
+++ b/sdk/python/arvados/util.py
@@ -1,7 +1,11 @@
# Copyright (C) The Arvados Authors. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
+"""Arvados utilities
+This module provides functions and constants that are useful across a variety
+of Arvados resource types, or extend the Arvados API client (see `arvados.api`).
+"""
import errno
import fcntl
@@ -17,24 +21,57 @@ import warnings
import arvados.errors
+from typing import (
+ Any,
+ Callable,
+ Dict,
+ Iterator,
+ TypeVar,
+ Union,
+)
+
+T = TypeVar('T')
+
HEX_RE = re.compile(r'^[0-9a-fA-F]+$')
+"""Regular expression to match a hexadecimal string (case-insensitive)"""
CR_UNCOMMITTED = 'Uncommitted'
+"""Constant `state` value for uncommited container requests"""
CR_COMMITTED = 'Committed'
+"""Constant `state` value for committed container requests"""
CR_FINAL = 'Final'
+"""Constant `state` value for finalized container requests"""
keep_locator_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+(\+\S+)*')
+"""Regular expression to match any Keep block locator"""
signed_locator_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+(\+\S+)*\+A\S+(\+\S+)*')
+"""Regular expression to match any Keep block locator with an access token hint"""
portable_data_hash_pattern = re.compile(r'[0-9a-f]{32}\+[0-9]+')
+"""Regular expression to match any collection portable data hash"""
uuid_pattern = re.compile(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}')
+"""Regular expression to match any Arvados object UUID"""
collection_uuid_pattern = re.compile(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}')
+"""Regular expression to match any Arvados collection UUID"""
group_uuid_pattern = re.compile(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}')
+"""Regular expression to match any Arvados group UUID"""
user_uuid_pattern = re.compile(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}')
+"""Regular expression to match any Arvados user UUID"""
link_uuid_pattern = re.compile(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}')
+"""Regular expression to match any Arvados link UUID"""
job_uuid_pattern = re.compile(r'[a-z0-9]{5}-8i9sb-[a-z0-9]{15}')
+"""Regular expression to match any Arvados job UUID
+
+.. WARNING:: Deprecated
+ Arvados job resources are deprecated and will be removed in a future
+ release. Prefer the containers API instead.
+"""
container_uuid_pattern = re.compile(r'[a-z0-9]{5}-dz642-[a-z0-9]{15}')
+"""Regular expression to match any Arvados container UUID"""
manifest_pattern = re.compile(r'((\S+)( +[a-f0-9]{32}(\+[0-9]+)(\+\S+)*)+( +[0-9]+:[0-9]+:\S+)+$)+', flags=re.MULTILINE)
+"""Regular expression to match an Arvados collection manifest text"""
keep_file_locator_pattern = re.compile(r'([0-9a-f]{32}\+[0-9]+)/(.*)')
+"""Regular expression to match a file path from a collection identified by portable data hash"""
keepuri_pattern = re.compile(r'keep:([0-9a-f]{32}\+[0-9]+)/(.*)')
+"""Regular expression to match a `keep:` URI with a collection identified by portable data hash"""
def _deprecated(version=None, preferred=None):
"""Mark a callable as deprecated in the SDK
@@ -88,15 +125,23 @@ def _deprecated(version=None, preferred=None):
return deprecated_wrapper
return deprecated_decorator
-def is_hex(s, *length_args):
- """is_hex(s[, length[, max_length]]) -> boolean
+def is_hex(s: str, *length_args: int) -> bool:
+ """Indicate whether a string is a hexadecimal number
+
+ This method returns true if all characters in the string are hexadecimal
+ digits. It is case-insensitive.
- Return True if s is a string of hexadecimal digits.
- If one length argument is given, the string must contain exactly
- that number of digits.
- If two length arguments are given, the string must contain a number of
- digits between those two lengths, inclusive.
- Return False otherwise.
+ You can also pass optional length arguments to check that the string has
+ the expected number of digits. If you pass one integer, the string must
+ have that length exactly, otherwise the method returns False. If you
+ pass two integers, the string's length must fall within that minimum and
+ maximum (inclusive), otherwise the method returns False.
+
+ Arguments:
+
+ * s: str --- The string to check
+
+ * length_args: int --- Optional length limit(s) for the string to check
"""
num_length_args = len(length_args)
if num_length_args > 2:
@@ -110,7 +155,45 @@ def is_hex(s, *length_args):
good_len = True
return bool(good_len and HEX_RE.match(s))
-def keyset_list_all(fn, order_key="created_at", num_retries=0, ascending=True, **kwargs):
+def keyset_list_all(
+ fn: Callable[..., 'arvados.api_resources.ArvadosAPIRequest'],
+ order_key: str="created_at",
+ num_retries: int=0,
+ ascending: bool=True,
+ **kwargs: Any,
+) -> Iterator[Dict[str, Any]]:
+ """Iterate all Arvados resources from an API list call
+
+ This method takes a method that represents an Arvados API list call, and
+ iterates the objects returned by the API server. It can make multiple API
+ calls to retrieve and iterate all objects available from the API server.
+
+ Arguments:
+
+ * fn: Callable[..., arvados.api_resources.ArvadosAPIRequest] --- A
+ function that wraps an Arvados API method that returns a list of
+ objects. If you have an Arvados API client named `arv`, examples
+ include `arv.collections().list` and `arv.groups().contents`. Note
+ that you should pass the function *without* calling it.
+
+ * order_key: str --- The name of the primary object field that objects
+ should be sorted by. This name is used to build an `order` argument
+ for `fn`. Default `'created_at'`.
+
+ * num_retries: int --- This argument is passed through to
+ `arvados.api_resources.ArvadosAPIRequest.execute` for each API call. See
+ that method's docstring for details. Default 0 (meaning API calls will
+ use the `num_retries` value set when the Arvados API client was
+ constructed).
+
+ * ascending: bool --- Used to build an `order` argument for `fn`. If True,
+ all fields will be sorted in `'asc'` (ascending) order. Otherwise, all
+ fields will be sorted in `'desc'` (descending) order.
+
+ Additional keyword arguments will be passed directly to `fn` for each API
+ call. Note that this function sets `count`, `limit`, and `order` as part of
+ its work.
+ """
pagesize = 1000
kwargs["limit"] = pagesize
kwargs["count"] = 'none'
@@ -177,12 +260,28 @@ def keyset_list_all(fn, order_key="created_at", num_retries=0, ascending=True, *
nextpage = [[order_key, ">=" if ascending else "<=", lastitem[order_key]], ["uuid", "!=", lastitem["uuid"]]]
prev_page_all_same_order_key = False
-def ca_certs_path(fallback=httplib2.CA_CERTS):
- """Return the path of the best available CA certs source.
+def ca_certs_path(fallback: T=httplib2.CA_CERTS) -> Union[str, T]:
+ """Return the path of the best available source of CA certificates
- This function searches for various distribution sources of CA
- certificates, and returns the first it finds. If it doesn't find any,
- it returns the value of `fallback` (httplib2's CA certs by default).
+ This function checks various known paths that provide trusted CA
+ certificates, and returns the first one that exists. It checks:
+
+ * the path in the `SSL_CERT_FILE` environment variable (used by OpenSSL)
+ * `/etc/arvados/ca-certificates.crt`, respected by all Arvados software
+ * `/etc/ssl/certs/ca-certificates.crt`, the default store on Debian-based
+ distributions
+ * `/etc/pki/tls/certs/ca-bundle.crt`, the default store on Red Hat-based
+ distributions
+
+ If none of these paths exist, this function returns the value of `fallback`.
+
+ Arguments:
+
+ * fallback: T --- The value to return if none of the known paths exist.
+ The default value is the certificate store of Mozilla's trusted CAs
+ included with the Python [certifi][] package.
+
+ [certifi]: https://pypi.org/project/certifi/
"""
for ca_certs_path in [
# SSL_CERT_FILE and SSL_CERT_DIR are openssl overrides - note
@@ -199,7 +298,12 @@ def ca_certs_path(fallback=httplib2.CA_CERTS):
return ca_certs_path
return fallback
-def new_request_id():
+def new_request_id() -> str:
+ """Return a random request ID
+
+ This function generates and returns a random string suitable for use as a
+ `X-Request-Id` header value in the Arvados API.
+ """
rid = "req-"
# 2**104 > 36**20 > 2**103
n = random.getrandbits(104)
@@ -212,7 +316,18 @@ def new_request_id():
n = n // 36
return rid
-def get_config_once(svc):
+def get_config_once(svc: 'arvados.api_resources.ArvadosAPIClient') -> Dict[str, Any]:
+ """Return an Arvados cluster's configuration, with caching
+
+ This function gets and returns the Arvados configuration from the API
+ server. It caches the result on the client object and reuses it on any
+ future calls.
+
+ Arguments:
+
+ * svc: arvados.api_resources.ArvadosAPIClient --- The Arvados API client
+ object to use to retrieve and cache the Arvados cluster configuration.
+ """
if not svc._rootDesc.get('resources').get('configs', False):
# Old API server version, no config export endpoint
return {}
@@ -220,7 +335,22 @@ def get_config_once(svc):
svc._cached_config = svc.configs().get().execute()
return svc._cached_config
-def get_vocabulary_once(svc):
+def get_vocabulary_once(svc: 'arvados.api_resources.ArvadosAPIClient') -> Dict[str, Any]:
+ """Return an Arvados cluster's vocabulary, with caching
+
+ This function gets and returns the Arvados vocabulary from the API
+ server. It caches the result on the client object and reuses it on any
+ future calls.
+
+ .. HINT:: Low-level method
+ This is a relatively low-level wrapper around the Arvados API. Most
+ users will prefer to use `arvados.vocabulary.load_vocabulary`.
+
+ Arguments:
+
+ * svc: arvados.api_resources.ArvadosAPIClient --- The Arvados API client
+ object to use to retrieve and cache the Arvados cluster vocabulary.
+ """
if not svc._rootDesc.get('resources').get('vocabularies', False):
# Old API server version, no vocabulary export endpoint
return {}
@@ -228,14 +358,20 @@ def get_vocabulary_once(svc):
svc._cached_vocabulary = svc.vocabularies().get().execute()
return svc._cached_vocabulary
-def trim_name(collectionname):
- """
- trim_name takes a record name (collection name, project name, etc)
- and trims it to fit the 255 character name limit, with additional
- space for the timestamp added by ensure_unique_name, by removing
- excess characters from the middle and inserting an ellipse
- """
+def trim_name(collectionname: str) -> str:
+ """Limit the length of a name to fit within Arvados API limits
+ This function ensures that a string is short enough to use as an object
+ name in the Arvados API, leaving room for text that may be added by the
+ `ensure_unique_name` argument. If the source name is short enough, it is
+ returned unchanged. Otherwise, this function returns a string with excess
+ characters removed from the middle of the source string and replaced with
+ an ellipsis.
+
+ Arguments:
+
+ * collectionname: str --- The desired source name
+ """
max_name_len = 254 - 28
if len(collectionname) > max_name_len:
commit 680d14bd31a04626ec0149d2e67c774462ccafd3
Author: Brett Smith <brett.smith at curii.com>
Date: Mon Nov 20 10:57:50 2023 -0500
19830: Update arvados.util._deprecated Markdown style
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py
index 50f44e402c..cd9b19ce63 100644
--- a/sdk/python/arvados/util.py
+++ b/sdk/python/arvados/util.py
@@ -45,12 +45,11 @@ def _deprecated(version=None, preferred=None):
If the following arguments are given, they'll be included in the
notices:
- preferred: str | None
- : The name of an alternative that users should use instead.
+ * preferred: str | None --- The name of an alternative that users should
+ use instead.
- version: str | None
- : The version of Arvados when the callable is scheduled to be
- removed.
+ * version: str | None --- The version of Arvados when the callable is
+ scheduled to be removed.
"""
if version is None:
version = ''
commit 0c0e68a8be50fce1bd537f504a59959499eef02a
Author: Brett Smith <brett.smith at curii.com>
Date: Mon Nov 20 10:57:10 2023 -0500
19830: Clean arvados.util imports
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py
index 6e2e1c7f4c..50f44e402c 100644
--- a/sdk/python/arvados/util.py
+++ b/sdk/python/arvados/util.py
@@ -2,9 +2,8 @@
#
# SPDX-License-Identifier: Apache-2.0
-from __future__ import division
-from builtins import range
+import errno
import fcntl
import functools
import hashlib
@@ -13,7 +12,6 @@ import os
import random
import re
import subprocess
-import errno
import sys
import warnings
commit 2d0f3de7dd0090abc88348e14bd4181747e9399e
Author: Brett Smith <brett.smith at curii.com>
Date: Mon Nov 20 10:53:02 2023 -0500
19830: Move deprecated functions to the bottom of arvados.util
This makes the web documentation easier to navigate.
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py
index 88adc8879b..6e2e1c7f4c 100644
--- a/sdk/python/arvados/util.py
+++ b/sdk/python/arvados/util.py
@@ -91,6 +91,177 @@ def _deprecated(version=None, preferred=None):
return deprecated_wrapper
return deprecated_decorator
+def is_hex(s, *length_args):
+ """is_hex(s[, length[, max_length]]) -> boolean
+
+ Return True if s is a string of hexadecimal digits.
+ If one length argument is given, the string must contain exactly
+ that number of digits.
+ If two length arguments are given, the string must contain a number of
+ digits between those two lengths, inclusive.
+ Return False otherwise.
+ """
+ num_length_args = len(length_args)
+ if num_length_args > 2:
+ raise arvados.errors.ArgumentError(
+ "is_hex accepts up to 3 arguments ({} given)".format(1 + num_length_args))
+ elif num_length_args == 2:
+ good_len = (length_args[0] <= len(s) <= length_args[1])
+ elif num_length_args == 1:
+ good_len = (len(s) == length_args[0])
+ else:
+ good_len = True
+ return bool(good_len and HEX_RE.match(s))
+
+def keyset_list_all(fn, order_key="created_at", num_retries=0, ascending=True, **kwargs):
+ pagesize = 1000
+ kwargs["limit"] = pagesize
+ kwargs["count"] = 'none'
+ asc = "asc" if ascending else "desc"
+ kwargs["order"] = ["%s %s" % (order_key, asc), "uuid %s" % asc]
+ other_filters = kwargs.get("filters", [])
+
+ try:
+ select = set(kwargs['select'])
+ except KeyError:
+ pass
+ else:
+ select.add(order_key)
+ select.add('uuid')
+ kwargs['select'] = list(select)
+
+ nextpage = []
+ tot = 0
+ expect_full_page = True
+ seen_prevpage = set()
+ seen_thispage = set()
+ lastitem = None
+ prev_page_all_same_order_key = False
+
+ while True:
+ kwargs["filters"] = nextpage+other_filters
+ items = fn(**kwargs).execute(num_retries=num_retries)
+
+ if len(items["items"]) == 0:
+ if prev_page_all_same_order_key:
+ nextpage = [[order_key, ">" if ascending else "<", lastitem[order_key]]]
+ prev_page_all_same_order_key = False
+ continue
+ else:
+ return
+
+ seen_prevpage = seen_thispage
+ seen_thispage = set()
+
+ for i in items["items"]:
+ # In cases where there's more than one record with the
+ # same order key, the result could include records we
+ # already saw in the last page. Skip them.
+ if i["uuid"] in seen_prevpage:
+ continue
+ seen_thispage.add(i["uuid"])
+ yield i
+
+ firstitem = items["items"][0]
+ lastitem = items["items"][-1]
+
+ if firstitem[order_key] == lastitem[order_key]:
+ # Got a page where every item has the same order key.
+ # Switch to using uuid for paging.
+ nextpage = [[order_key, "=", lastitem[order_key]], ["uuid", ">" if ascending else "<", lastitem["uuid"]]]
+ prev_page_all_same_order_key = True
+ else:
+ # Start from the last order key seen, but skip the last
+ # known uuid to avoid retrieving the same row twice. If
+ # there are multiple rows with the same order key it is
+ # still likely we'll end up retrieving duplicate rows.
+ # That's handled by tracking the "seen" rows for each page
+ # so they can be skipped if they show up on the next page.
+ nextpage = [[order_key, ">=" if ascending else "<=", lastitem[order_key]], ["uuid", "!=", lastitem["uuid"]]]
+ prev_page_all_same_order_key = False
+
+def ca_certs_path(fallback=httplib2.CA_CERTS):
+ """Return the path of the best available CA certs source.
+
+ This function searches for various distribution sources of CA
+ certificates, and returns the first it finds. If it doesn't find any,
+ it returns the value of `fallback` (httplib2's CA certs by default).
+ """
+ for ca_certs_path in [
+ # SSL_CERT_FILE and SSL_CERT_DIR are openssl overrides - note
+ # that httplib2 itself also supports HTTPLIB2_CA_CERTS.
+ os.environ.get('SSL_CERT_FILE'),
+ # Arvados specific:
+ '/etc/arvados/ca-certificates.crt',
+ # Debian:
+ '/etc/ssl/certs/ca-certificates.crt',
+ # Red Hat:
+ '/etc/pki/tls/certs/ca-bundle.crt',
+ ]:
+ if ca_certs_path and os.path.exists(ca_certs_path):
+ return ca_certs_path
+ return fallback
+
+def new_request_id():
+ rid = "req-"
+ # 2**104 > 36**20 > 2**103
+ n = random.getrandbits(104)
+ for _ in range(20):
+ c = n % 36
+ if c < 10:
+ rid += chr(c+ord('0'))
+ else:
+ rid += chr(c+ord('a')-10)
+ n = n // 36
+ return rid
+
+def get_config_once(svc):
+ if not svc._rootDesc.get('resources').get('configs', False):
+ # Old API server version, no config export endpoint
+ return {}
+ if not hasattr(svc, '_cached_config'):
+ svc._cached_config = svc.configs().get().execute()
+ return svc._cached_config
+
+def get_vocabulary_once(svc):
+ if not svc._rootDesc.get('resources').get('vocabularies', False):
+ # Old API server version, no vocabulary export endpoint
+ return {}
+ if not hasattr(svc, '_cached_vocabulary'):
+ svc._cached_vocabulary = svc.vocabularies().get().execute()
+ return svc._cached_vocabulary
+
+def trim_name(collectionname):
+ """
+ trim_name takes a record name (collection name, project name, etc)
+ and trims it to fit the 255 character name limit, with additional
+ space for the timestamp added by ensure_unique_name, by removing
+ excess characters from the middle and inserting an ellipse
+ """
+
+ max_name_len = 254 - 28
+
+ if len(collectionname) > max_name_len:
+ over = len(collectionname) - max_name_len
+ split = int(max_name_len/2)
+ collectionname = collectionname[0:split] + "…" + collectionname[split+over:]
+
+ return collectionname
+
+ at _deprecated('3.0', 'arvados.util.keyset_list_all')
+def list_all(fn, num_retries=0, **kwargs):
+ # Default limit to (effectively) api server's MAX_LIMIT
+ kwargs.setdefault('limit', sys.maxsize)
+ items = []
+ offset = 0
+ items_available = sys.maxsize
+ while len(items) < items_available:
+ c = fn(offset=offset, **kwargs).execute(num_retries=num_retries)
+ items += c['items']
+ items_available = c['items_available']
+ offset = c['offset'] + len(c['items'])
+ return items
+
@_deprecated('3.0')
def clear_tmpdir(path=None):
"""
@@ -428,174 +599,3 @@ def listdir_recursive(dirname, base=None, max_depth=None):
else:
allfiles += [ent_base]
return allfiles
-
-def is_hex(s, *length_args):
- """is_hex(s[, length[, max_length]]) -> boolean
-
- Return True if s is a string of hexadecimal digits.
- If one length argument is given, the string must contain exactly
- that number of digits.
- If two length arguments are given, the string must contain a number of
- digits between those two lengths, inclusive.
- Return False otherwise.
- """
- num_length_args = len(length_args)
- if num_length_args > 2:
- raise arvados.errors.ArgumentError(
- "is_hex accepts up to 3 arguments ({} given)".format(1 + num_length_args))
- elif num_length_args == 2:
- good_len = (length_args[0] <= len(s) <= length_args[1])
- elif num_length_args == 1:
- good_len = (len(s) == length_args[0])
- else:
- good_len = True
- return bool(good_len and HEX_RE.match(s))
-
- at _deprecated('3.0', 'arvados.util.keyset_list_all')
-def list_all(fn, num_retries=0, **kwargs):
- # Default limit to (effectively) api server's MAX_LIMIT
- kwargs.setdefault('limit', sys.maxsize)
- items = []
- offset = 0
- items_available = sys.maxsize
- while len(items) < items_available:
- c = fn(offset=offset, **kwargs).execute(num_retries=num_retries)
- items += c['items']
- items_available = c['items_available']
- offset = c['offset'] + len(c['items'])
- return items
-
-def keyset_list_all(fn, order_key="created_at", num_retries=0, ascending=True, **kwargs):
- pagesize = 1000
- kwargs["limit"] = pagesize
- kwargs["count"] = 'none'
- asc = "asc" if ascending else "desc"
- kwargs["order"] = ["%s %s" % (order_key, asc), "uuid %s" % asc]
- other_filters = kwargs.get("filters", [])
-
- try:
- select = set(kwargs['select'])
- except KeyError:
- pass
- else:
- select.add(order_key)
- select.add('uuid')
- kwargs['select'] = list(select)
-
- nextpage = []
- tot = 0
- expect_full_page = True
- seen_prevpage = set()
- seen_thispage = set()
- lastitem = None
- prev_page_all_same_order_key = False
-
- while True:
- kwargs["filters"] = nextpage+other_filters
- items = fn(**kwargs).execute(num_retries=num_retries)
-
- if len(items["items"]) == 0:
- if prev_page_all_same_order_key:
- nextpage = [[order_key, ">" if ascending else "<", lastitem[order_key]]]
- prev_page_all_same_order_key = False
- continue
- else:
- return
-
- seen_prevpage = seen_thispage
- seen_thispage = set()
-
- for i in items["items"]:
- # In cases where there's more than one record with the
- # same order key, the result could include records we
- # already saw in the last page. Skip them.
- if i["uuid"] in seen_prevpage:
- continue
- seen_thispage.add(i["uuid"])
- yield i
-
- firstitem = items["items"][0]
- lastitem = items["items"][-1]
-
- if firstitem[order_key] == lastitem[order_key]:
- # Got a page where every item has the same order key.
- # Switch to using uuid for paging.
- nextpage = [[order_key, "=", lastitem[order_key]], ["uuid", ">" if ascending else "<", lastitem["uuid"]]]
- prev_page_all_same_order_key = True
- else:
- # Start from the last order key seen, but skip the last
- # known uuid to avoid retrieving the same row twice. If
- # there are multiple rows with the same order key it is
- # still likely we'll end up retrieving duplicate rows.
- # That's handled by tracking the "seen" rows for each page
- # so they can be skipped if they show up on the next page.
- nextpage = [[order_key, ">=" if ascending else "<=", lastitem[order_key]], ["uuid", "!=", lastitem["uuid"]]]
- prev_page_all_same_order_key = False
-
-def ca_certs_path(fallback=httplib2.CA_CERTS):
- """Return the path of the best available CA certs source.
-
- This function searches for various distribution sources of CA
- certificates, and returns the first it finds. If it doesn't find any,
- it returns the value of `fallback` (httplib2's CA certs by default).
- """
- for ca_certs_path in [
- # SSL_CERT_FILE and SSL_CERT_DIR are openssl overrides - note
- # that httplib2 itself also supports HTTPLIB2_CA_CERTS.
- os.environ.get('SSL_CERT_FILE'),
- # Arvados specific:
- '/etc/arvados/ca-certificates.crt',
- # Debian:
- '/etc/ssl/certs/ca-certificates.crt',
- # Red Hat:
- '/etc/pki/tls/certs/ca-bundle.crt',
- ]:
- if ca_certs_path and os.path.exists(ca_certs_path):
- return ca_certs_path
- return fallback
-
-def new_request_id():
- rid = "req-"
- # 2**104 > 36**20 > 2**103
- n = random.getrandbits(104)
- for _ in range(20):
- c = n % 36
- if c < 10:
- rid += chr(c+ord('0'))
- else:
- rid += chr(c+ord('a')-10)
- n = n // 36
- return rid
-
-def get_config_once(svc):
- if not svc._rootDesc.get('resources').get('configs', False):
- # Old API server version, no config export endpoint
- return {}
- if not hasattr(svc, '_cached_config'):
- svc._cached_config = svc.configs().get().execute()
- return svc._cached_config
-
-def get_vocabulary_once(svc):
- if not svc._rootDesc.get('resources').get('vocabularies', False):
- # Old API server version, no vocabulary export endpoint
- return {}
- if not hasattr(svc, '_cached_vocabulary'):
- svc._cached_vocabulary = svc.vocabularies().get().execute()
- return svc._cached_vocabulary
-
-def trim_name(collectionname):
- """
- trim_name takes a record name (collection name, project name, etc)
- and trims it to fit the 255 character name limit, with additional
- space for the timestamp added by ensure_unique_name, by removing
- excess characters from the middle and inserting an ellipse
- """
-
- max_name_len = 254 - 28
-
- if len(collectionname) > max_name_len:
- over = len(collectionname) - max_name_len
- split = int(max_name_len/2)
- collectionname = collectionname[0:split] + "…" + collectionname[split+over:]
-
- return collectionname
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list