[arvados] created: 2.6.0-548-g312bc8bb5b
git repository hosting
git at public.arvados.org
Mon Aug 28 19:54:42 UTC 2023
at 312bc8bb5bb01f8dc3fbf98a98a5de402172ba88 (commit)
commit 312bc8bb5bb01f8dc3fbf98a98a5de402172ba88
Author: Brett Smith <brett.smith at curii.com>
Date: Sat Aug 26 22:13:39 2023 -0400
20839: Mark deprecated PySDK functions as such
This is in preparation to remove these from the SDK in Arvados 3.0.
These changes exacerbated the circular import between the top-level
arvados and util, which had knock-off effects on other import
statements, causing them to see the world differently (like whether
`arvados.api` is a module or function). Most of the import-related
shuffling is meant to mitigate all that.
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/__init__.py b/sdk/python/arvados/__init__.py
index 39fdb11003..21ca72c4bd 100644
--- a/sdk/python/arvados/__init__.py
+++ b/sdk/python/arvados/__init__.py
@@ -49,6 +49,7 @@ logger.addHandler(log_handler)
logger.setLevel(stdliblog.DEBUG if config.get('ARVADOS_DEBUG')
else stdliblog.WARNING)
+ at util._deprecated('3.0', 'arvados-cwl-runner or the containers API')
def task_set_output(self, s, num_retries=5):
for tries_left in RetryLoop(num_retries=num_retries, backoff_start=0):
try:
@@ -66,6 +67,7 @@ def task_set_output(self, s, num_retries=5):
raise
_current_task = None
+ at util._deprecated('3.0', 'arvados-cwl-runner or the containers API')
def current_task(num_retries=5):
global _current_task
if _current_task:
@@ -86,6 +88,7 @@ def current_task(num_retries=5):
raise
_current_job = None
+ at util._deprecated('3.0', 'arvados-cwl-runner or the containers API')
def current_job(num_retries=5):
global _current_job
if _current_job:
@@ -104,21 +107,26 @@ def current_job(num_retries=5):
else:
raise
+ at util._deprecated('3.0', 'arvados-cwl-runner or the containers API')
def getjobparam(*args):
return current_job()['script_parameters'].get(*args)
+ at util._deprecated('3.0', 'arvados-cwl-runner or the containers API')
def get_job_param_mount(*args):
return os.path.join(os.environ['TASK_KEEPMOUNT'], current_job()['script_parameters'].get(*args))
+ at util._deprecated('3.0', 'arvados-cwl-runner or the containers API')
def get_task_param_mount(*args):
return os.path.join(os.environ['TASK_KEEPMOUNT'], current_task()['parameters'].get(*args))
class JobTask(object):
+ @util._deprecated('3.0', 'arvados-cwl-runner or the containers API')
def __init__(self, parameters=dict(), runtime_constraints=dict()):
print("init jobtask %s %s" % (parameters, runtime_constraints))
class job_setup(object):
@staticmethod
+ @util._deprecated('3.0', 'arvados-cwl-runner or the containers API')
def one_task_per_input_file(if_sequence=0, and_end_task=True, input_as_path=False, api_client=None):
if if_sequence != current_task()['sequence']:
return
@@ -151,6 +159,7 @@ class job_setup(object):
exit(0)
@staticmethod
+ @util._deprecated('3.0', 'arvados-cwl-runner or the containers API')
def one_task_per_input_stream(if_sequence=0, and_end_task=True):
if if_sequence != current_task()['sequence']:
return
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index 5e49d883a2..bfb43be5eb 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -99,6 +99,7 @@ class _WriterFile(_FileLikeObjectBase):
class CollectionWriter(CollectionBase):
"""Deprecated, use Collection instead."""
+ @arvados.util._deprecated('3.0', 'arvados.collection.Collection')
def __init__(self, api_client=None, num_retries=0, replication=None):
"""Instantiate a CollectionWriter.
@@ -412,6 +413,7 @@ class ResumableCollectionWriter(CollectionWriter):
'_data_buffer', '_dependencies', '_finished_streams',
'_queued_dirents', '_queued_trees']
+ @arvados.util._deprecated('3.0', 'arvados.collection.Collection')
def __init__(self, api_client=None, **kwargs):
self._dependencies = {}
super(ResumableCollectionWriter, self).__init__(api_client, **kwargs)
@@ -1956,11 +1958,14 @@ class CollectionReader(Collection):
self._streams = [normalize_stream(s, streams[s])
for s in sorted(streams)]
+
+ @arvados.util._deprecated('3.0', 'Collection iteration')
@_populate_streams
def all_streams(self):
return [StreamReader(s, self._my_keep(), num_retries=self.num_retries)
for s in self._streams]
+ @arvados.util._deprecated('3.0', 'Collection iteration')
@_populate_streams
def all_files(self):
for s in self.all_streams():
diff --git a/sdk/python/arvados/commands/migrate19.py b/sdk/python/arvados/commands/migrate19.py
index 3ce47b2066..2fef419ee8 100644
--- a/sdk/python/arvados/commands/migrate19.py
+++ b/sdk/python/arvados/commands/migrate19.py
@@ -18,6 +18,7 @@ import arvados
import arvados.commands.keepdocker
from arvados._version import __version__
from arvados.collection import CollectionReader
+from .. import util
logger = logging.getLogger('arvados.migrate-docker19')
logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG')
@@ -29,6 +30,7 @@ _migration_link_name = 'migrate_1.9_1.10'
class MigrationFailed(Exception):
pass
+ at util._deprecated('3.0')
def main(arguments=None):
"""Docker image format migration tool for Arvados.
diff --git a/sdk/python/arvados/crunch.py b/sdk/python/arvados/crunch.py
index 70b8b44033..6dd144c43b 100644
--- a/sdk/python/arvados/crunch.py
+++ b/sdk/python/arvados/crunch.py
@@ -5,6 +5,7 @@
from builtins import object
import json
import os
+from . import util
class TaskOutputDir(object):
"""Keep-backed directory for staging outputs of Crunch tasks.
@@ -21,6 +22,7 @@ class TaskOutputDir(object):
f.write('42')
arvados.current_task().set_output(out.manifest_text())
"""
+ @util._deprecated('3.0', 'arvados-cwl-runner or the containers API')
def __init__(self):
self.path = os.environ['TASK_KEEPMOUNT_TMP']
diff --git a/sdk/python/arvados/safeapi.py b/sdk/python/arvados/safeapi.py
index e9dde19625..3ecc72a950 100644
--- a/sdk/python/arvados/safeapi.py
+++ b/sdk/python/arvados/safeapi.py
@@ -10,13 +10,15 @@ Arvados API client.
from __future__ import absolute_import
from builtins import object
+import sys
import threading
-from . import api
from . import config
from . import keep
from . import util
+api = sys.modules['arvados.api']
+
class ThreadSafeApiCache(object):
"""Thread-safe wrapper for an Arvados API client
diff --git a/sdk/python/arvados/stream.py b/sdk/python/arvados/stream.py
index eadfbbec07..37cd5d7db8 100644
--- a/sdk/python/arvados/stream.py
+++ b/sdk/python/arvados/stream.py
@@ -20,9 +20,11 @@ from arvados.retry import retry_method
from arvados.keep import *
from . import config
from . import errors
+from . import util
from ._normalize_stream import normalize_stream
class StreamReader(object):
+ @util._deprecated('3.0', 'arvados.collection.Collecttion')
def __init__(self, tokens, keep=None, debug=False, _empty=False,
num_retries=10):
self._stream_name = None
diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py
index 2f145e7343..16f2999ca8 100644
--- a/sdk/python/arvados/util.py
+++ b/sdk/python/arvados/util.py
@@ -6,6 +6,7 @@ from __future__ import division
from builtins import range
import fcntl
+import functools
import hashlib
import httplib2
import os
@@ -14,9 +15,9 @@ import re
import subprocess
import errno
import sys
+import warnings
-import arvados
-from arvados.collection import CollectionReader
+import arvados.errors
HEX_RE = re.compile(r'^[0-9a-fA-F]+$')
CR_UNCOMMITTED = 'Uncommitted'
@@ -35,13 +36,67 @@ job_uuid_pattern = re.compile(r'[a-z0-9]{5}-8i9sb-[a-z0-9]{15}')
container_uuid_pattern = re.compile(r'[a-z0-9]{5}-dz642-[a-z0-9]{15}')
manifest_pattern = re.compile(r'((\S+)( +[a-f0-9]{32}(\+\d+)(\+\S+)*)+( +\d+:\d+:\S+)+$)+', flags=re.MULTILINE)
+def _deprecated(version=None, preferred=None):
+ """Mark a callable as deprecated in the SDK
+
+ This will wrap the callable to emit as a DeprecationWarning
+ and add a deprecation notice to its docstring.
+
+ If the following arguments are given, they'll be included in the
+ notices:
+
+ preferred: str | None
+ : The name of an alternative that users should use instead.
+
+ version: str | None
+ : The version of Arvados when the callable is scheduled to be
+ removed.
+ """
+ if version is None:
+ version = ''
+ else:
+ version = f' and scheduled to be removed in Arvados {version}'
+ if preferred is None:
+ preferred = ''
+ else:
+ preferred = f' Prefer {preferred} instead.'
+ def deprecated_decorator(func):
+ fullname = f'{func.__module__}.{func.__qualname__}'
+ parent, _, name = fullname.rpartition('.')
+ if name == '__init__':
+ fullname = parent
+ warning_msg = f'{fullname} is deprecated{version}.{preferred}'
+ @functools.wraps(func)
+ def deprecated_wrapper(*args, **kwargs):
+ warnings.warn(warning_msg, DeprecationWarning, 2)
+ return func(*args, **kwargs)
+ # Get func's docstring without any trailing newline or empty lines.
+ func_doc = re.sub(r'\n\s*$', '', func.__doc__ or '')
+ match = re.search(r'\n([ \t]+)\S', func_doc)
+ indent = '' if match is None else match.group(1)
+ # Make the deprecation notice the second "paragraph" of the
+ # docstring if possible. Otherwise append it.
+ docstring, count = re.subn(
+ rf'\n[ \t]*\n{indent}',
+ f'\n\n{indent}DEPRECATED: {warning_msg}\n\n{indent}',
+ func_doc,
+ count=1,
+ )
+ if not count:
+ docstring = f'{func_doc}\n\n{indent}DEPRECATED: {warning_msg}'.lstrip()
+ deprecated_wrapper.__doc__ = docstring
+ return deprecated_wrapper
+ return deprecated_decorator
+
+ at _deprecated('3.0')
def clear_tmpdir(path=None):
"""
Ensure the given directory (or TASK_TMPDIR if none given)
exists and is empty.
"""
+ from arvados import current_task
if path is None:
- path = arvados.current_task().tmpdir
+ path = current_task().tmpdir
if os.path.exists(path):
p = subprocess.Popen(['rm', '-rf', path])
stdout, stderr = p.communicate(None)
@@ -49,6 +104,7 @@ def clear_tmpdir(path=None):
raise Exception('rm -rf %s: %s' % (path, stderr))
os.mkdir(path)
+ at _deprecated('3.0', 'subprocess.run')
def run_command(execargs, **kwargs):
kwargs.setdefault('stdin', subprocess.PIPE)
kwargs.setdefault('stdout', subprocess.PIPE)
@@ -63,9 +119,11 @@ def run_command(execargs, **kwargs):
(execargs, p.returncode, stderrdata))
return stdoutdata, stderrdata
+ at _deprecated('3.0')
def git_checkout(url, version, path):
+ from arvados import current_job
if not re.search('^/', path):
- path = os.path.join(arvados.current_job().tmpdir, path)
+ path = os.path.join(current_job().tmpdir, path)
if not os.path.exists(path):
run_command(["git", "clone", url, path],
cwd=os.path.dirname(path))
@@ -73,6 +131,7 @@ def git_checkout(url, version, path):
cwd=path)
return path
+ at _deprecated('3.0')
def tar_extractor(path, decompress_flag):
return subprocess.Popen(["tar",
"-C", path,
@@ -82,6 +141,7 @@ def tar_extractor(path, decompress_flag):
stdin=subprocess.PIPE, stderr=sys.stderr,
shell=False, close_fds=True)
+ at _deprecated('3.0', 'arvados.collection.Collection.open and the tarfile module')
def tarball_extract(tarball, path):
"""Retrieve a tarball from Keep and extract it to a local
directory. Return the absolute path where the tarball was
@@ -92,8 +152,10 @@ def tarball_extract(tarball, path):
tarball -- collection locator
path -- where to extract the tarball: absolute, or relative to job tmp
"""
+ from arvados import current_job
+ from arvados.collection import CollectionReader
if not re.search('^/', path):
- path = os.path.join(arvados.current_job().tmpdir, path)
+ path = os.path.join(current_job().tmpdir, path)
lockfile = open(path + '.lock', 'w')
fcntl.flock(lockfile, fcntl.LOCK_EX)
try:
@@ -144,6 +206,7 @@ def tarball_extract(tarball, path):
return os.path.join(path, tld_extracts[0])
return path
+ at _deprecated('3.0', 'arvados.collection.Collection.open and the zipfile module')
def zipball_extract(zipball, path):
"""Retrieve a zip archive from Keep and extract it to a local
directory. Return the absolute path where the archive was
@@ -154,8 +217,10 @@ def zipball_extract(zipball, path):
zipball -- collection locator
path -- where to extract the archive: absolute, or relative to job tmp
"""
+ from arvados import current_job
+ from arvados.collection import CollectionReader
if not re.search('^/', path):
- path = os.path.join(arvados.current_job().tmpdir, path)
+ path = os.path.join(current_job().tmpdir, path)
lockfile = open(path + '.lock', 'w')
fcntl.flock(lockfile, fcntl.LOCK_EX)
try:
@@ -210,6 +275,7 @@ def zipball_extract(zipball, path):
return os.path.join(path, tld_extracts[0])
return path
+ at _deprecated('3.0', 'arvados.collection.Collection')
def collection_extract(collection, path, files=[], decompress=True):
"""Retrieve a collection from Keep and extract it to a local
directory. Return the absolute path where the collection was
@@ -218,13 +284,15 @@ def collection_extract(collection, path, files=[], decompress=True):
collection -- collection locator
path -- where to extract: absolute, or relative to job tmp
"""
+ from arvados import current_job
+ from arvados.collection import CollectionReader
matches = re.search(r'^([0-9a-f]+)(\+[\w@]+)*$', collection)
if matches:
collection_hash = matches.group(1)
else:
collection_hash = hashlib.md5(collection).hexdigest()
if not re.search('^/', path):
- path = os.path.join(arvados.current_job().tmpdir, path)
+ path = os.path.join(current_job().tmpdir, path)
lockfile = open(path + '.lock', 'w')
fcntl.flock(lockfile, fcntl.LOCK_EX)
try:
@@ -273,6 +341,7 @@ def collection_extract(collection, path, files=[], decompress=True):
lockfile.close()
return path
+ at _deprecated('3.0', 'pathlib.Path().mkdir(parents=True, exist_ok=True)')
def mkdir_dash_p(path):
if not os.path.isdir(path):
try:
@@ -285,6 +354,7 @@ def mkdir_dash_p(path):
else:
raise
+ at _deprecated('3.0', 'arvados.collection.Collection')
def stream_extract(stream, path, files=[], decompress=True):
"""Retrieve a stream from Keep and extract it to a local
directory. Return the absolute path where the stream was
@@ -293,8 +363,9 @@ def stream_extract(stream, path, files=[], decompress=True):
stream -- StreamReader object
path -- where to extract: absolute, or relative to job tmp
"""
+ from arvados import current_job
if not re.search('^/', path):
- path = os.path.join(arvados.current_job().tmpdir, path)
+ path = os.path.join(current_job().tmpdir, path)
lockfile = open(path + '.lock', 'w')
fcntl.flock(lockfile, fcntl.LOCK_EX)
try:
@@ -325,6 +396,7 @@ def stream_extract(stream, path, files=[], decompress=True):
lockfile.close()
return path
+ at _deprecated('3.0', 'os.walk')
def listdir_recursive(dirname, base=None, max_depth=None):
"""listdir_recursive(dirname, base, max_depth)
@@ -376,6 +448,7 @@ def is_hex(s, *length_args):
good_len = True
return bool(good_len and HEX_RE.match(s))
+ at _deprecated('3.0', 'arvados.util.keyset_list_all')
def list_all(fn, num_retries=0, **kwargs):
# Default limit to (effectively) api server's MAX_LIMIT
kwargs.setdefault('limit', sys.maxsize)
@@ -456,7 +529,6 @@ def keyset_list_all(fn, order_key="created_at", num_retries=0, ascending=True, *
nextpage = [[order_key, ">=" if ascending else "<=", lastitem[order_key]], ["uuid", "!=", lastitem["uuid"]]]
prev_page_all_same_order_key = False
-
def ca_certs_path(fallback=httplib2.CA_CERTS):
"""Return the path of the best available CA certs source.
commit e78c96c45d3021120384c12286b7208a53358f6e
Author: Brett Smith <brett.smith at curii.com>
Date: Sat Aug 26 23:59:21 2023 -0400
20839: Add order_key to select in keyset_list_all
Like uuid, the main loop requires this field.
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py
index 5bce8d3f83..2f145e7343 100644
--- a/sdk/python/arvados/util.py
+++ b/sdk/python/arvados/util.py
@@ -397,8 +397,14 @@ def keyset_list_all(fn, order_key="created_at", num_retries=0, ascending=True, *
kwargs["order"] = ["%s %s" % (order_key, asc), "uuid %s" % asc]
other_filters = kwargs.get("filters", [])
- if "select" in kwargs and "uuid" not in kwargs["select"]:
- kwargs["select"].append("uuid")
+ try:
+ select = set(kwargs['select'])
+ except KeyError:
+ pass
+ else:
+ select.add(order_key)
+ select.add('uuid')
+ kwargs['select'] = list(select)
nextpage = []
tot = 0
diff --git a/sdk/python/tests/test_util.py b/sdk/python/tests/test_util.py
index 4dba9ce3dc..f111abcf4a 100644
--- a/sdk/python/tests/test_util.py
+++ b/sdk/python/tests/test_util.py
@@ -2,10 +2,14 @@
#
# SPDX-License-Identifier: Apache-2.0
+import itertools
import os
+import parameterized
import subprocess
import unittest
+from unittest import mock
+
import arvados
import arvados.util
@@ -54,6 +58,12 @@ class KeysetTestHelper:
self.n += 1
return self.expect[self.n-1][1]
+_SELECT_FAKE_ITEM = {
+ 'uuid': 'zzzzz-zyyyz-zzzzzyyyyywwwww',
+ 'name': 'KeysetListAllTestCase.test_select mock',
+ 'created_at': '2023-08-28T12:34:56.123456Z',
+}
+
class KeysetListAllTestCase(unittest.TestCase):
def test_empty(self):
ks = KeysetTestHelper([[
@@ -163,7 +173,6 @@ class KeysetListAllTestCase(unittest.TestCase):
ls = list(arvados.util.keyset_list_all(ks.fn, filters=[["foo", ">", "bar"]]))
self.assertEqual(ls, [{"created_at": "1", "uuid": "1"}, {"created_at": "2", "uuid": "2"}])
-
def test_onepage_desc(self):
ks = KeysetTestHelper([[
{"limit": 1000, "count": "none", "order": ["created_at desc", "uuid desc"], "filters": []},
@@ -175,3 +184,35 @@ class KeysetListAllTestCase(unittest.TestCase):
ls = list(arvados.util.keyset_list_all(ks.fn, ascending=False))
self.assertEqual(ls, [{"created_at": "2", "uuid": "2"}, {"created_at": "1", "uuid": "1"}])
+
+ @parameterized.parameterized.expand(zip(
+ itertools.cycle(_SELECT_FAKE_ITEM),
+ itertools.chain.from_iterable(
+ itertools.combinations(_SELECT_FAKE_ITEM, count)
+ for count in range(len(_SELECT_FAKE_ITEM) + 1)
+ ),
+ ))
+ def test_select(self, order_key, select):
+ # keyset_list_all must have both uuid and order_key to function.
+ # Test that it selects those fields along with user-specified ones.
+ expect_select = {'uuid', order_key, *select}
+ item = {
+ key: value
+ for key, value in _SELECT_FAKE_ITEM.items()
+ if key in expect_select
+ }
+ list_func = mock.Mock()
+ list_func().execute = mock.Mock(
+ side_effect=[
+ {'items': [item]},
+ {'items': []},
+ {'items': []},
+ ],
+ )
+ list_func.reset_mock()
+ actual = list(arvados.util.keyset_list_all(list_func, order_key, select=list(select)))
+ self.assertEqual(actual, [item])
+ calls = list_func.call_args_list
+ self.assertTrue(len(calls) >= 2, "list_func() not called enough to exhaust items")
+ for call in calls:
+ self.assertEqual(set(call.kwargs.get('select', ())), expect_select)
commit e4842b4e0901470a984f5b9244a8b89bb7726f42
Author: Brett Smith <brett.smith at curii.com>
Date: Sat Aug 26 22:52:13 2023 -0400
20839: Replace arvados.util.mkdir_dash_p calls throughout
In preparation to deprecate the function.
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py
index a7f3837599..c51be82b20 100644
--- a/sdk/python/arvados/api.py
+++ b/sdk/python/arvados/api.py
@@ -19,6 +19,7 @@ import httplib2
import json
import logging
import os
+import pathlib
import re
import socket
import ssl
@@ -173,15 +174,16 @@ def _new_http_error(cls, *args, **kwargs):
apiclient_errors.HttpError.__new__ = staticmethod(_new_http_error)
def http_cache(data_type):
- homedir = os.environ.get('HOME')
- if not homedir or len(homedir) == 0:
+ try:
+ homedir = pathlib.Path.home()
+ except RuntimeError:
return None
- path = homedir + '/.cache/arvados/' + data_type
+ path = pathlib.Path(homedir, '.cache', 'arvados', data_type)
try:
- util.mkdir_dash_p(path)
+ path.mkdir(parents=True, exist_ok=True)
except OSError:
return None
- return cache.SafeHTTPCache(path, max_age=60*60*24*2)
+ return cache.SafeHTTPCache(str(path), max_age=60*60*24*2)
def api_client(
version,
diff --git a/sdk/python/arvados/commands/get.py b/sdk/python/arvados/commands/get.py
index c4db072cc2..b37a8477ac 100755
--- a/sdk/python/arvados/commands/get.py
+++ b/sdk/python/arvados/commands/get.py
@@ -6,6 +6,7 @@
import argparse
import hashlib
import os
+import pathlib
import re
import string
import sys
@@ -261,7 +262,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
logger.error('Local file %s already exists.' % (outfilename,))
return 1
if args.r:
- arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
+ pathlib.Path(outfilename).parent.mkdir(parents=True, exist_ok=True)
try:
outfile = open(outfilename, 'wb')
except Exception as error:
commit 2f5d32c82d865c6760fd857a2bef235592f4e4a8
Author: Brett Smith <brett.smith at curii.com>
Date: Sat Aug 26 22:01:32 2023 -0400
20839: Replace arvados.util.list_all calls throughout
In preparation to deprecate this function. This does leave a couple
calls in arv-docker-migrate19, but that command will also be deprecated
and removed at least as soon as the function.
The changes to the the sdk/cwl/tests/test_submit.py mocks are necessary
because of mechanical differences between list_all and keyset_list_all.
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
test fixup
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py
index d415be8856..9dad245254 100644
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -10,6 +10,7 @@ from future.utils import viewvalues
import copy
import io
+import itertools
import functools
import hashlib
import json
@@ -1047,43 +1048,37 @@ class TestSubmit(unittest.TestCase):
api.return_value = mock.MagicMock()
arvrunner.api = api.return_value
arvrunner.runtimeContext.match_local_docker = False
- arvrunner.api.links().list().execute.side_effect = ({"items": [{"created_at": "",
- "head_uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzb",
- "link_class": "docker_image_repo+tag",
- "name": "arvados/jobs:"+arvados_cwl.__version__,
- "owner_uuid": "",
- "properties": {"image_timestamp": ""}}], "items_available": 1, "offset": 0},
- {"items": [{"created_at": "",
- "head_uuid": "",
- "link_class": "docker_image_hash",
- "name": "123456",
- "owner_uuid": "",
- "properties": {"image_timestamp": ""}}], "items_available": 1, "offset": 0},
- {"items": [{"created_at": "",
- "head_uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzb",
- "link_class": "docker_image_repo+tag",
- "name": "arvados/jobs:"+arvados_cwl.__version__,
- "owner_uuid": "",
- "properties": {"image_timestamp": ""}}], "items_available": 1, "offset": 0},
- {"items": [{"created_at": "",
- "head_uuid": "",
- "link_class": "docker_image_hash",
- "name": "123456",
- "owner_uuid": "",
- "properties": {"image_timestamp": ""}}], "items_available": 1, "offset": 0}
- )
+ arvrunner.api.links().list().execute.side_effect = itertools.cycle([
+ {"items": [{"created_at": "2023-08-25T12:34:56.123456Z",
+ "head_uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzb",
+ "link_class": "docker_image_repo+tag",
+ "name": "arvados/jobs:"+arvados_cwl.__version__,
+ "owner_uuid": "",
+ "uuid": "zzzzz-o0j2j-arvadosjobsrepo",
+ "properties": {"image_timestamp": ""}}]},
+ {"items": []},
+ {"items": []},
+ {"items": [{"created_at": "2023-08-25T12:34:57.234567Z",
+ "head_uuid": "",
+ "link_class": "docker_image_hash",
+ "name": "123456",
+ "owner_uuid": "",
+ "uuid": "zzzzz-o0j2j-arvadosjobshash",
+ "properties": {"image_timestamp": ""}}]},
+ {"items": []},
+ {"items": []},
+ ])
find_one_image_hash.return_value = "123456"
- arvrunner.api.collections().list().execute.side_effect = ({"items": [{"uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzb",
- "owner_uuid": "",
- "manifest_text": "",
- "properties": ""
- }], "items_available": 1, "offset": 0},
- {"items": [{"uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzb",
- "owner_uuid": "",
- "manifest_text": "",
- "properties": ""
- }], "items_available": 1, "offset": 0})
+ arvrunner.api.collections().list().execute.side_effect = itertools.cycle([
+ {"items": [{"uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzb",
+ "owner_uuid": "",
+ "manifest_text": "",
+ "created_at": "2023-08-25T12:34:55.012345Z",
+ "properties": {}}]},
+ {"items": []},
+ {"items": []},
+ ])
arvrunner.api.collections().create().execute.return_value = {"uuid": ""}
arvrunner.api.collections().get().execute.return_value = {"uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzb",
"portable_data_hash": "9999999999999999999999999999999b+99"}
diff --git a/sdk/python/arvados/commands/arv_copy.py b/sdk/python/arvados/commands/arv_copy.py
index 0a9696b13c..10fe9d7024 100755
--- a/sdk/python/arvados/commands/arv_copy.py
+++ b/sdk/python/arvados/commands/arv_copy.py
@@ -706,20 +706,20 @@ def copy_project(obj_uuid, src, dst, owner_uuid, args):
# Copy collections
try:
- copy_collections([col["uuid"] for col in arvados.util.list_all(src.collections().list, filters=[["owner_uuid", "=", obj_uuid]])],
+ copy_collections([col["uuid"] for col in arvados.util.keyset_list_all(src.collections().list, filters=[["owner_uuid", "=", obj_uuid]])],
src, dst, args)
except Exception as e:
partial_error += "\n" + str(e)
# Copy workflows
- for w in arvados.util.list_all(src.workflows().list, filters=[["owner_uuid", "=", obj_uuid]]):
+ for w in arvados.util.keyset_list_all(src.workflows().list, filters=[["owner_uuid", "=", obj_uuid]]):
try:
copy_workflow(w["uuid"], src, dst, args)
except Exception as e:
partial_error += "\n" + "Error while copying %s: %s" % (w["uuid"], e)
if args.recursive:
- for g in arvados.util.list_all(src.groups().list, filters=[["owner_uuid", "=", obj_uuid]]):
+ for g in arvados.util.keyset_list_all(src.groups().list, filters=[["owner_uuid", "=", obj_uuid]]):
try:
copy_project(g["uuid"], src, dst, project_record["uuid"], args)
except Exception as e:
diff --git a/sdk/python/arvados/commands/federation_migrate.py b/sdk/python/arvados/commands/federation_migrate.py
index 3090ff8f9a..770e1609db 100755
--- a/sdk/python/arvados/commands/federation_migrate.py
+++ b/sdk/python/arvados/commands/federation_migrate.py
@@ -97,13 +97,12 @@ def fetch_users(clusters, loginCluster):
by_email = {}
by_username = {}
- users = []
- for c, arv in clusters.items():
- print("Getting user list from %s" % c)
- ul = arvados.util.list_all(arv.users().list, bypass_federation=True)
- for l in ul:
- if l["uuid"].startswith(c):
- users.append(l)
+ users = [
+ user
+ for prefix, arv in clusters.items()
+ for user in arvados.util.keyset_list_all(arv.users().list, bypass_federation=True)
+ if user['uuid'].startswith(prefix)
+ ]
# Users list is sorted by email
# Go through users and collect users with same email
diff --git a/sdk/python/arvados/commands/keepdocker.py b/sdk/python/arvados/commands/keepdocker.py
index 922256a27e..4c5b13aa17 100644
--- a/sdk/python/arvados/commands/keepdocker.py
+++ b/sdk/python/arvados/commands/keepdocker.py
@@ -240,8 +240,9 @@ def docker_link_sort_key(link):
return (image_timestamp, created_timestamp)
def _get_docker_links(api_client, num_retries, **kwargs):
- links = arvados.util.list_all(api_client.links().list,
- num_retries, **kwargs)
+ links = list(arvados.util.keyset_list_all(
+ api_client.links().list, num_retries=num_retries, **kwargs,
+ ))
for link in links:
link['_sort_key'] = docker_link_sort_key(link)
links.sort(key=itemgetter('_sort_key'), reverse=True)
@@ -340,10 +341,12 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None,
images.sort(key=itemgetter('_sort_key'), reverse=True)
# Remove any image listings that refer to unknown collections.
- existing_coll_uuids = {coll['uuid'] for coll in arvados.util.list_all(
- api_client.collections().list, num_retries,
- filters=[['uuid', 'in', [im['collection'] for im in images]]]+project_filter,
- select=['uuid'])}
+ existing_coll_uuids = {coll['uuid'] for coll in arvados.util.keyset_list_all(
+ api_client.collections().list,
+ num_retries=num_retries,
+ filters=[['uuid', 'in', [im['collection'] for im in images]]]+project_filter,
+ select=['uuid'],
+ )}
return [(image['collection'], image) for image in images
if image['collection'] in existing_coll_uuids]
commit b1fb83ab538f6690822c16e5d08264278cb5942b
Author: Brett Smith <brett.smith at curii.com>
Date: Sat Aug 26 21:55:39 2023 -0400
20839: Optimize list sort
Trivial optimization I noticed while working around here.
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/commands/federation_migrate.py b/sdk/python/arvados/commands/federation_migrate.py
index 32b3211f14..3090ff8f9a 100755
--- a/sdk/python/arvados/commands/federation_migrate.py
+++ b/sdk/python/arvados/commands/federation_migrate.py
@@ -111,7 +111,7 @@ def fetch_users(clusters, loginCluster):
# call add_accum_rows() to generate the report rows with
# the "home cluster" set, and also fill in the by_email table.
- users = sorted(users, key=lambda u: u["email"]+"::"+(u["username"] or "")+"::"+u["uuid"])
+ users.sort(key=lambda u: (u["email"], u["username"] or "", u["uuid"]))
accum = []
lastemail = None
commit 31c39c3ea1729dc555d2d2bcc210dfd9b6ed6d70
Author: Brett Smith <brett.smith at curii.com>
Date: Sat Aug 26 21:42:20 2023 -0400
20839: Replace arvados.util.run_command calls in arv-copy
In preparation for deprecating this function.
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/commands/arv_copy.py b/sdk/python/arvados/commands/arv_copy.py
index 63c0cbea28..0a9696b13c 100755
--- a/sdk/python/arvados/commands/arv_copy.py
+++ b/sdk/python/arvados/commands/arv_copy.py
@@ -30,6 +30,7 @@ import getpass
import os
import re
import shutil
+import subprocess
import sys
import logging
import tempfile
@@ -224,8 +225,12 @@ def api_for_instance(instance_name, num_retries):
# Check if git is available
def check_git_availability():
try:
- arvados.util.run_command(['git', '--help'])
- except Exception:
+ subprocess.run(
+ ['git', '--version'],
+ check=True,
+ stdout=subprocess.DEVNULL,
+ )
+ except FileNotFoundError:
abort('git command is not available. Please ensure git is installed.')
@@ -612,8 +617,6 @@ def select_git_url(api, repo_name, retries, allow_insecure_http, allow_insecure_
priority = https_url + other_url + http_url
- git_config = []
- git_url = None
for url in priority:
if url.startswith("http"):
u = urllib.parse.urlsplit(url)
@@ -625,17 +628,22 @@ def select_git_url(api, repo_name, retries, allow_insecure_http, allow_insecure_
try:
logger.debug("trying %s", url)
- arvados.util.run_command(["git"] + git_config + ["ls-remote", url],
- env={"HOME": os.environ["HOME"],
- "ARVADOS_API_TOKEN": api.api_token,
- "GIT_ASKPASS": "/bin/false"})
- except arvados.errors.CommandFailedError:
+ subprocess.run(
+ ['git', *git_config, 'ls-remote', url],
+ check=True,
+ env={
+ 'ARVADOS_API_TOKEN': api.api_token,
+ 'GIT_ASKPASS': '/bin/false',
+ 'HOME': os.environ['HOME'],
+ },
+ stdout=subprocess.DEVNULL,
+ )
+ except subprocess.CalledProcessError:
pass
else:
git_url = url
break
-
- if not git_url:
+ else:
raise Exception('Cannot access git repository, tried {}'
.format(priority))
@@ -728,9 +736,14 @@ def copy_project(obj_uuid, src, dst, owner_uuid, args):
# repository)
#
def git_rev_parse(rev, repo):
- gitout, giterr = arvados.util.run_command(
- ['git', 'rev-parse', rev], cwd=repo)
- return gitout.strip()
+ proc = subprocess.run(
+ ['git', 'rev-parse', rev],
+ check=True,
+ cwd=repo,
+ stdout=subprocess.PIPE,
+ text=True,
+ )
+ return proc.stdout.read().strip()
# uuid_type(api, object_uuid)
#
commit 497c6e358e3dac2e44456d319e81fa130e7479c7
Author: Brett Smith <brett.smith at curii.com>
Date: Sat Aug 26 19:32:53 2023 -0400
20839: Remove old Python 2 TextIOWrapper shim
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py
index a816d2d0e2..5e49d883a2 100644
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -37,21 +37,6 @@ from arvados.retry import retry_method
_logger = logging.getLogger('arvados.collection')
-
-if sys.version_info >= (3, 0):
- TextIOWrapper = io.TextIOWrapper
-else:
- class TextIOWrapper(io.TextIOWrapper):
- """To maintain backward compatibility, cast str to unicode in
- write('foo').
-
- """
- def write(self, data):
- if isinstance(data, basestring):
- data = unicode(data)
- return super(TextIOWrapper, self).write(data)
-
-
class CollectionBase(object):
"""Abstract base class for Collection classes."""
@@ -721,7 +706,7 @@ class RichCollectionBase(CollectionBase):
f = fclass(arvfile, mode=binmode, num_retries=self.num_retries)
if 'b' not in mode:
bufferclass = io.BufferedRandom if f.writable() else io.BufferedReader
- f = TextIOWrapper(bufferclass(WrappableFile(f)), encoding=encoding)
+ f = io.TextIOWrapper(bufferclass(WrappableFile(f)), encoding=encoding)
return f
def modified(self):
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list