[arvados] created: 2.7.0-6638-gf015f4ce8a
git repository hosting
git at public.arvados.org
Fri May 24 21:03:07 UTC 2024
at f015f4ce8ac8d3f7334952e3b76713e9400c8536 (commit)
commit f015f4ce8ac8d3f7334952e3b76713e9400c8536
Author: Brett Smith <brett.smith at curii.com>
Date: Fri May 24 16:39:29 2024 -0400
21020: Remove the make_block_cache test utility function
Clearing the user's cache is pretty rude for the tests to do, even if
it's "safe."
This causes the CollectionWriter tests around replication to
fail. They're not worth fixing since there's already a branch for #15397
to remove them wholesale.
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/tests/arvados_testutil.py b/sdk/python/tests/arvados_testutil.py
index 1f1f796a9c..f27954e9fe 100644
--- a/sdk/python/tests/arvados_testutil.py
+++ b/sdk/python/tests/arvados_testutil.py
@@ -280,14 +280,6 @@ def binary_compare(a, b):
return False
return True
-def make_block_cache(disk_cache):
- if disk_cache:
- disk_cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "arvados", "keep")
- shutil.rmtree(disk_cache_dir, ignore_errors=True)
- block_cache = arvados.keep.KeepBlockCache(disk_cache=disk_cache)
- return block_cache
-
-
class DiskCacheBase:
def make_block_cache(self, disk_cache):
self.disk_cache_dir = tempfile.mkdtemp() if disk_cache else None
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index c6b8ed1600..45e6056d19 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -8,7 +8,9 @@ import datetime
import os
import random
import re
+import shutil
import sys
+import tempfile
import time
import unittest
@@ -16,9 +18,9 @@ import parameterized
from unittest import mock
import arvados
+import arvados.keep
from arvados.collection import Collection, CollectionReader
from arvados._ranges import Range, LocatorAndRange
-from .arvados_testutil import make_block_cache
from . import arvados_testutil as tutil
from . import run_test_server
@@ -29,6 +31,7 @@ class TestResumableWriter(arvados.ResumableCollectionWriter):
def current_state(self):
return self.dump_state(copy.deepcopy)
+
@parameterized.parameterized_class([{"disk_cache": True}, {"disk_cache": False}])
class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
tutil.ArvadosBaseTestCase):
@@ -40,10 +43,23 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
super(ArvadosCollectionsTest, cls).setUpClass()
# need admin privileges to make collections with unsigned blocks
run_test_server.authorize_with('admin')
+ if cls.disk_cache:
+ cls._disk_cache_dir = tempfile.mkdtemp(prefix='CollectionsTest-')
+ else:
+ cls._disk_cache_dir = None
+ block_cache = arvados.keep.KeepBlockCache(
+ disk_cache=cls.disk_cache,
+ disk_cache_dir=cls._disk_cache_dir,
+ )
cls.api_client = arvados.api('v1')
cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
local_store=cls.local_store,
- block_cache=make_block_cache(cls.disk_cache))
+ block_cache=block_cache)
+
+ @classmethod
+ def tearDownClass(cls):
+ if cls._disk_cache_dir:
+ shutil.rmtree(cls._disk_cache_dir)
def write_foo_bar_baz(self):
cw = arvados.CollectionWriter(self.api_client)
@@ -650,7 +666,7 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
self.assertRaises(IOError, reader.open, 'nonexistent')
- at tutil.skip_sleep
+ at unittest.skip("will be removed in #15397")
class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
def mock_keep(self, body, *codes, **headers):
headers.setdefault('x-keep-replicas-stored', 2)
diff --git a/services/fuse/tests/mount_test_base.py b/services/fuse/tests/mount_test_base.py
index 9768aeb74d..d69cdf1c1a 100644
--- a/services/fuse/tests/mount_test_base.py
+++ b/services/fuse/tests/mount_test_base.py
@@ -26,16 +26,20 @@ from .integration_test import workerPool
logger = logging.getLogger('arvados.arv-mount')
-def make_block_cache(disk_cache):
- if disk_cache:
- disk_cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "arvados", "keep")
- shutil.rmtree(disk_cache_dir, ignore_errors=True)
- block_cache = arvados.keep.KeepBlockCache(disk_cache=disk_cache)
- return block_cache
-
class MountTestBase(unittest.TestCase):
disk_cache = False
+ @classmethod
+ def setUpClass(cls):
+ if cls.disk_cache:
+ cls._disk_cache_dir = tempfile.mkdtemp(prefix='MountTest-')
+ else:
+ cls._disk_cache_dir = None
+ cls._keep_block_cache = arvados.keep.KeepBlockCache(
+ disk_cache=cls.disk_cache,
+ disk_cache_dir=cls._disk_cache_dir,
+ )
+
def setUp(self, api=None, local_store=True):
# The underlying C implementation of open() makes a fstat() syscall
# with the GIL still held. When the GETATTR message comes back to
@@ -57,11 +61,16 @@ class MountTestBase(unittest.TestCase):
self.api = api if api else arvados.safeapi.ThreadSafeApiCache(
arvados.config.settings(),
- keep_params={"block_cache": make_block_cache(self.disk_cache)},
+ keep_params={"block_cache": self._keep_block_cache},
version='v1',
)
self.llfuse_thread = None
+ @classmethod
+ def tearDownClass(cls):
+ if cls._disk_cache_dir:
+ shutil.rmtree(cls._disk_cache_dir)
+
# This is a copy of Mount's method. TODO: Refactor MountTestBase
# to use a Mount instead of copying its code.
def _llfuse_main(self):
commit 8aaf1905d3bf6b779f637023f66c8d7547524031
Author: Brett Smith <brett.smith at curii.com>
Date: Fri May 24 15:54:15 2024 -0400
21020: Sort imports to follow PEP 8
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index 5d574856dd..c6b8ed1600 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -2,26 +2,27 @@
#
# SPDX-License-Identifier: Apache-2.0
-import arvados
+import ciso8601
import copy
+import datetime
import os
import random
import re
import sys
-import datetime
-import ciso8601
import time
import unittest
-import parameterized
+import parameterized
from unittest import mock
-from . import run_test_server
-from arvados._ranges import Range, LocatorAndRange
+import arvados
from arvados.collection import Collection, CollectionReader
-from . import arvados_testutil as tutil
+from arvados._ranges import Range, LocatorAndRange
from .arvados_testutil import make_block_cache
+from . import arvados_testutil as tutil
+from . import run_test_server
+
class TestResumableWriter(arvados.ResumableCollectionWriter):
KEEP_BLOCK_SIZE = 1024 # PUT to Keep every 1K.
commit 0ea1f7c1e36d7462ccd483086f305954152cb966
Author: Brett Smith <brett.smith at curii.com>
Date: Fri May 24 14:55:45 2024 -0400
21020: Update arv-put to use cache directories from environment
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index 38c5302b2d..ce2e5375b7 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -26,6 +26,8 @@ import threading
import time
import traceback
+from pathlib import Path
+
from apiclient import errors as apiclient_errors
from arvados._version import __version__
@@ -351,7 +353,7 @@ class ArvPutLogFormatter(logging.Formatter):
class ResumeCache(object):
- CACHE_DIR = '.cache/arvados/arv-put'
+ CACHE_DIR = 'arv-put'
def __init__(self, file_spec):
self.cache_file = open(file_spec, 'a+')
@@ -368,9 +370,14 @@ class ResumeCache(object):
md5.update(b'-1')
elif args.filename:
md5.update(args.filename.encode())
- return os.path.join(
- arv_cmd.make_home_conf_dir(cls.CACHE_DIR, 0o700, 'raise'),
- md5.hexdigest())
+ cache_path = Path(cls.CACHE_DIR)
+ if len(cache_path.parts) == 1:
+ cache_path = arvados.util._BaseDirectories('CACHE').storage_path(cache_path)
+ else:
+ # Note this is a noop if cache_path is absolute, which is what we want.
+ cache_path = Path.home() / cache_path
+ cache_path.mkdir(parents=True, exist_ok=True, mode=0o700)
+ return str(cache_path / md5.hexdigest())
def _lock_file(self, fileobj):
try:
@@ -433,7 +440,7 @@ class ResumeCache(object):
class ArvPutUploadJob(object):
- CACHE_DIR = '.cache/arvados/arv-put'
+ CACHE_DIR = 'arv-put'
EMPTY_STATE = {
'manifest' : None, # Last saved manifest checkpoint
'files' : {} # Previous run file list: {path : {size, mtime}}
@@ -859,11 +866,14 @@ class ArvPutUploadJob(object):
md5.update(b'\0'.join([p.encode() for p in realpaths]))
if self.filename:
md5.update(self.filename.encode())
- cache_filename = md5.hexdigest()
- cache_filepath = os.path.join(
- arv_cmd.make_home_conf_dir(self.CACHE_DIR, 0o700, 'raise'),
- cache_filename)
- return cache_filepath
+ cache_path = Path(self.CACHE_DIR)
+ if len(cache_path.parts) == 1:
+ cache_path = arvados.util._BaseDirectories('CACHE').storage_path(cache_path)
+ else:
+ # Note this is a noop if cache_path is absolute, which is what we want.
+ cache_path = Path.home() / cache_path
+ cache_path.mkdir(parents=True, exist_ok=True, mode=0o700)
+ return str(cache_path / md5.hexdigest())
def _setup_state(self, update_collection):
"""
diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py
index e8a3e65bdc..3b8269f99c 100644
--- a/sdk/python/tests/test_arv_put.py
+++ b/sdk/python/tests/test_arv_put.py
@@ -25,11 +25,14 @@ import time
import unittest
import uuid
+import pytest
from functools import partial
+from pathlib import Path
from unittest import mock
import arvados
import arvados.commands.put as arv_put
+import arvados.util
from . import arvados_testutil as tutil
from .arvados_testutil import ArvadosBaseTestCase, fake_httplib2_response
@@ -245,6 +248,76 @@ class ArvadosPutResumeCacheTest(ArvadosBaseTestCase):
arv_put.ResumeCache, path)
+class TestArvadosPutResumeCacheDir:
+ @pytest.fixture
+ def args(self, tmp_path):
+ return arv_put.parse_arguments([str(tmp_path)])
+
+ @pytest.mark.parametrize('cache_dir', [None, 'test-put'])
+ def test_cache_subdir(self, tmp_path, monkeypatch, cache_dir, args):
+ if cache_dir is None:
+ cache_dir = arv_put.ResumeCache.CACHE_DIR
+ else:
+ monkeypatch.setattr(arv_put.ResumeCache, 'CACHE_DIR', cache_dir)
+ monkeypatch.setattr(arvados.util._BaseDirectories, 'storage_path', tmp_path.__truediv__)
+ actual = arv_put.ResumeCache.make_path(args)
+ assert isinstance(actual, str)
+ assert Path(actual).parent == (tmp_path / cache_dir)
+
+ def test_cache_relative_dir(self, tmp_path, monkeypatch, args):
+ expected = Path('rel', 'dir')
+ monkeypatch.setattr(Path, 'home', lambda: tmp_path)
+ monkeypatch.setattr(arv_put.ResumeCache, 'CACHE_DIR', str(expected))
+ actual = arv_put.ResumeCache.make_path(args)
+ assert isinstance(actual, str)
+ parent = Path(actual).parent
+ assert parent == (tmp_path / expected)
+ assert parent.is_dir()
+
+ def test_cache_absolute_dir(self, tmp_path, monkeypatch, args):
+ expected = tmp_path / 'arv-put'
+ monkeypatch.setattr(Path, 'home', lambda: tmp_path / 'home')
+ monkeypatch.setattr(arv_put.ResumeCache, 'CACHE_DIR', str(expected))
+ actual = arv_put.ResumeCache.make_path(args)
+ assert isinstance(actual, str)
+ parent = Path(actual).parent
+ assert parent == expected
+ assert parent.is_dir()
+
+
+class TestArvadosPutUploadJobCacheDir:
+ @pytest.mark.parametrize('cache_dir', [None, 'test-put'])
+ def test_cache_subdir(self, tmp_path, monkeypatch, cache_dir):
+ def storage_path(self, subdir='.', mode=0o700):
+ path = tmp_path / subdir
+ path.mkdir(mode=mode)
+ return path
+ if cache_dir is None:
+ cache_dir = arv_put.ArvPutUploadJob.CACHE_DIR
+ else:
+ monkeypatch.setattr(arv_put.ArvPutUploadJob, 'CACHE_DIR', cache_dir)
+ monkeypatch.setattr(arvados.util._BaseDirectories, 'storage_path', storage_path)
+ job = arv_put.ArvPutUploadJob([str(tmp_path)], use_cache=True)
+ job.destroy_cache()
+ assert Path(job._cache_filename).parent == (tmp_path / cache_dir)
+
+ def test_cache_relative_dir(self, tmp_path, monkeypatch):
+ expected = Path('rel', 'dir')
+ monkeypatch.setattr(Path, 'home', lambda: tmp_path)
+ monkeypatch.setattr(arv_put.ArvPutUploadJob, 'CACHE_DIR', str(expected))
+ job = arv_put.ArvPutUploadJob([str(tmp_path)], use_cache=True)
+ job.destroy_cache()
+ assert Path(job._cache_filename).parent == (tmp_path / expected)
+
+ def test_cache_absolute_dir(self, tmp_path, monkeypatch):
+ expected = tmp_path / 'arv-put'
+ monkeypatch.setattr(Path, 'home', lambda: tmp_path / 'home')
+ monkeypatch.setattr(arv_put.ArvPutUploadJob, 'CACHE_DIR', str(expected))
+ job = arv_put.ArvPutUploadJob([str(tmp_path)], use_cache=True)
+ job.destroy_cache()
+ assert Path(job._cache_filename).parent == expected
+
+
class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
ArvadosBaseTestCase):
commit 53cc094bee6d9cd915fe4bb392ccf2a6a8ee955f
Author: Brett Smith <brett.smith at curii.com>
Date: Fri May 24 14:19:20 2024 -0400
21020: Generalize arv-put's arvados.util import
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index d1961c8c8a..38c5302b2d 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -28,8 +28,8 @@ import traceback
from apiclient import errors as apiclient_errors
from arvados._version import __version__
-from arvados.util import keep_locator_pattern
+import arvados.util
import arvados.commands._util as arv_cmd
api_client = None
@@ -942,7 +942,7 @@ class ArvPutUploadJob(object):
oldest_exp = None
oldest_loc = None
block_found = False
- for m in keep_locator_pattern.finditer(self._state['manifest']):
+ for m in arvados.util.keep_locator_pattern.finditer(self._state['manifest']):
loc = m.group(0)
try:
exp = datetime.datetime.utcfromtimestamp(int(loc.split('@')[1], 16))
commit c9888b00ddf2714cd93d6c34464eb93073a703f1
Author: Brett Smith <brett.smith at curii.com>
Date: Fri May 24 12:38:42 2024 -0400
21020: Add subdirectory argument to storage_path
A lot of users want this anyway so this is an easy way to DRY things
up.
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py
index 6b6ee36b3e..1116d6f155 100644
--- a/sdk/python/arvados/api.py
+++ b/sdk/python/arvados/api.py
@@ -169,8 +169,7 @@ def http_cache(data_type: str) -> cache.SafeHTTPCache:
where data is cached.
"""
try:
- path = util._BaseDirectories('CACHE').storage_path() / data_type
- path.mkdir(exist_ok=True)
+ path = util._BaseDirectories('CACHE').storage_path(data_type)
except (OSError, RuntimeError):
return None
else:
diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py
index 9a9b924dc6..a869099a65 100644
--- a/sdk/python/arvados/keep.py
+++ b/sdk/python/arvados/keep.py
@@ -176,9 +176,7 @@ class KeepBlockCache(object):
self._cache_updating = threading.Condition(self._cache_lock)
if self._disk_cache and self._disk_cache_dir is None:
- cache_path = arvados.util._BaseDirectories('CACHE').storage_path() / 'keep'
- cache_path.mkdir(mode=0o700, exist_ok=True)
- self._disk_cache_dir = str(cache_path)
+ self._disk_cache_dir = str(arvados.util._BaseDirectories('CACHE').storage_path('keep'))
if self._max_slots == 0:
if self._disk_cache:
diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py
index 8f7f06b753..a9d69c324a 100644
--- a/sdk/python/arvados/util.py
+++ b/sdk/python/arvados/util.py
@@ -236,16 +236,22 @@ class _BaseDirectories:
if path.exists():
yield path
- def storage_path(self) -> Path:
+ def storage_path(
+ self,
+ subdir: Union[str, os.PathLike]=PurePath(),
+ mode: int=0o700,
+ ) -> Path:
for path in self._spec.iter_systemd(self._env):
try:
mode = path.stat().st_mode
except OSError:
continue
if (mode & self._STORE_MODE) == self._STORE_MODE:
- return path
- path = self._spec.xdg_home(self._env, self._xdg_subdir)
- path.mkdir(parents=True, exist_ok=True)
+ break
+ else:
+ path = self._spec.xdg_home(self._env, self._xdg_subdir)
+ path /= subdir
+ path.mkdir(parents=True, exist_ok=True, mode=mode)
return path
diff --git a/sdk/python/tests/test_cache.py b/sdk/python/tests/test_cache.py
index 33a19cbdbc..5241187f05 100644
--- a/sdk/python/tests/test_cache.py
+++ b/sdk/python/tests/test_cache.py
@@ -50,14 +50,17 @@ class CacheTestThread(threading.Thread):
class TestAPIHTTPCache:
@pytest.mark.parametrize('data_type', ['discovery', 'keep'])
def test_good_storage(self, tmp_path, monkeypatch, data_type):
- monkeypatch.setattr(arvados.util._BaseDirectories, 'storage_path', lambda _: tmp_path)
+ def storage_path(self, subdir='.', mode=0o700):
+ path = tmp_path / subdir
+ path.mkdir(mode=mode)
+ return path
+ monkeypatch.setattr(arvados.util._BaseDirectories, 'storage_path', storage_path)
actual = arvados.http_cache(data_type)
- assert actual is not None
- assert (tmp_path / data_type).is_dir()
+ assert str(actual) == str(tmp_path / data_type)
@pytest.mark.parametrize('error', [RuntimeError, FileExistsError, PermissionError])
def test_unwritable_storage(self, monkeypatch, error):
- def fail(self):
+ def fail(self, subdir='.', mode=0o700):
raise error()
monkeypatch.setattr(arvados.util._BaseDirectories, 'storage_path', fail)
actual = arvados.http_cache('unwritable')
diff --git a/sdk/python/tests/test_keep_client.py b/sdk/python/tests/test_keep_client.py
index b3bdeae449..cc1df7821a 100644
--- a/sdk/python/tests/test_keep_client.py
+++ b/sdk/python/tests/test_keep_client.py
@@ -1473,12 +1473,11 @@ class KeepDiskCacheTestCase(unittest.TestCase, tutil.ApiClientMock):
@mock.patch('arvados.util._BaseDirectories.storage_path')
def test_default_disk_cache_dir(self, storage_path):
- expected = Path(self.disk_cache_dir, 'keep')
- storage_path.return_value = expected.parent
+ expected = Path(self.disk_cache_dir)
+ storage_path.return_value = expected
cache = arvados.keep.KeepBlockCache(disk_cache=True)
- storage_path.assert_called()
+ storage_path.assert_called_with('keep')
self.assertEqual(cache._disk_cache_dir, str(expected))
- self.assertTrue(expected.is_dir(), "cache did not create disk cache directory")
@mock.patch('arvados.KeepClient.KeepService.get')
def test_disk_cache_read(self, get_mock):
diff --git a/sdk/python/tests/test_util.py b/sdk/python/tests/test_util.py
index f3f01addd9..9f74d0205f 100644
--- a/sdk/python/tests/test_util.py
+++ b/sdk/python/tests/test_util.py
@@ -238,6 +238,14 @@ class TestBaseDirectories:
def env(self, tmp_path):
return {'HOME': str(tmp_path)}
+ @pytest.fixture
+ def umask(self):
+ orig_umask = os.umask(0o002)
+ try:
+ yield
+ finally:
+ os.umask(orig_umask)
+
def test_search_systemd_dirs(self, dir_spec, env, tmp_path):
env['TEST_DIRECTORY'] = f'{tmp_path}:{self.SELF_PATH.parent}'
dirs = arvados.util._BaseDirectories(dir_spec, env, 'tests')
@@ -333,6 +341,20 @@ class TestBaseDirectories:
exp_mode = stat.S_IFDIR | stat.S_IWUSR
assert (expected.stat().st_mode & exp_mode) == exp_mode
+ @pytest.mark.parametrize('subdir,mode', [
+ ('str/dir', 0o750),
+ (Path('sub', 'path'), 0o770),
+ ])
+ def test_storage_path_subdir(self, dir_spec, env, umask, tmp_path, subdir, mode):
+ expected = tmp_path / dir_spec.xdg_home_default / 'arvados' / subdir
+ dirs = arvados.util._BaseDirectories(dir_spec, env)
+ actual = dirs.storage_path(subdir, mode)
+ assert actual == expected
+ expect_mode = mode | stat.S_IFDIR
+ actual_mode = actual.stat().st_mode
+ assert (actual_mode & expect_mode) == expect_mode
+ assert not (actual_mode & stat.S_IRWXO)
+
def test_empty_xdg_home(self, dir_spec, env, tmp_path):
env['XDG_TEST_HOME'] = ''
expected = tmp_path / dir_spec.xdg_home_default / 'emptyhome'
commit 6df1a6c15e83f6e00c675f95b1c77c2ef984e518
Author: Brett Smith <brett.smith at curii.com>
Date: Fri May 24 11:30:36 2024 -0400
21020: Get default Keep cache directory from environment
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py
index 5501b84271..9a9b924dc6 100644
--- a/sdk/python/arvados/keep.py
+++ b/sdk/python/arvados/keep.py
@@ -176,8 +176,9 @@ class KeepBlockCache(object):
self._cache_updating = threading.Condition(self._cache_lock)
if self._disk_cache and self._disk_cache_dir is None:
- self._disk_cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "arvados", "keep")
- os.makedirs(self._disk_cache_dir, mode=0o700, exist_ok=True)
+ cache_path = arvados.util._BaseDirectories('CACHE').storage_path() / 'keep'
+ cache_path.mkdir(mode=0o700, exist_ok=True)
+ self._disk_cache_dir = str(cache_path)
if self._max_slots == 0:
if self._disk_cache:
diff --git a/sdk/python/tests/test_keep_client.py b/sdk/python/tests/test_keep_client.py
index 25e01b76b2..b3bdeae449 100644
--- a/sdk/python/tests/test_keep_client.py
+++ b/sdk/python/tests/test_keep_client.py
@@ -17,6 +17,7 @@ import time
import unittest
import urllib.parse
+from pathlib import Path
from unittest import mock
from unittest.mock import patch
@@ -1470,6 +1471,15 @@ class KeepDiskCacheTestCase(unittest.TestCase, tutil.ApiClientMock):
def tearDown(self):
shutil.rmtree(self.disk_cache_dir)
+ @mock.patch('arvados.util._BaseDirectories.storage_path')
+ def test_default_disk_cache_dir(self, storage_path):
+ expected = Path(self.disk_cache_dir, 'keep')
+ storage_path.return_value = expected.parent
+ cache = arvados.keep.KeepBlockCache(disk_cache=True)
+ storage_path.assert_called()
+ self.assertEqual(cache._disk_cache_dir, str(expected))
+ self.assertTrue(expected.is_dir(), "cache did not create disk cache directory")
+
@mock.patch('arvados.KeepClient.KeepService.get')
def test_disk_cache_read(self, get_mock):
# confirm it finds an existing cache block when the cache is
diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py
index 8004e8303f..dbd0dd0b5a 100644
--- a/services/fuse/arvados_fuse/command.py
+++ b/services/fuse/arvados_fuse/command.py
@@ -305,7 +305,7 @@ After this time, the mount will be forcefully unmounted.
cache.add_argument(
'--disk-cache-dir',
metavar="DIRECTORY",
- help="Filesystem cache location (default `~/.cache/arvados/keep`)",
+ help="Set custom filesystem cache location",
)
cache.add_argument(
'--directory-cache',
commit 0f92c494d1c75b884fbf7661df43246ad0902125
Author: Brett Smith <brett.smith at curii.com>
Date: Fri May 24 11:14:57 2024 -0400
21020: Sort imports to follow PEP 8
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/tests/test_keep_client.py b/sdk/python/tests/test_keep_client.py
index f1fb761560..25e01b76b2 100644
--- a/sdk/python/tests/test_keep_client.py
+++ b/sdk/python/tests/test_keep_client.py
@@ -2,26 +2,26 @@
#
# SPDX-License-Identifier: Apache-2.0
+import errno
import hashlib
+import mmap
import os
-import errno
-import pycurl
import random
import re
import shutil
import socket
-import sys
import stat
+import sys
import tempfile
import time
import unittest
import urllib.parse
-import mmap
from unittest import mock
from unittest.mock import patch
import parameterized
+import pycurl
import arvados
import arvados.retry
commit dc4647255991a1b1722baf2fdfd3f6993ee76912
Author: Brett Smith <brett.smith at curii.com>
Date: Fri May 24 11:12:50 2024 -0400
21020: Adjust spacing to follow PEP 8
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/tests/test_keep_client.py b/sdk/python/tests/test_keep_client.py
index 2dc4363f0c..f1fb761560 100644
--- a/sdk/python/tests/test_keep_client.py
+++ b/sdk/python/tests/test_keep_client.py
@@ -190,6 +190,7 @@ class KeepPermissionTestCase(run_test_server.TestCaseWithServers, DiskCacheBase)
keep_client.get,
unsigned_bar_locator)
+
@parameterized.parameterized_class([{"disk_cache": True}, {"disk_cache": False}])
class KeepProxyTestCase(run_test_server.TestCaseWithServers, DiskCacheBase):
disk_cache = False
@@ -240,6 +241,7 @@ class KeepProxyTestCase(run_test_server.TestCaseWithServers, DiskCacheBase):
local_store='',
block_cache=self.make_block_cache(self.disk_cache))
+
@parameterized.parameterized_class([{"disk_cache": True}, {"disk_cache": False}])
class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCacheBase):
disk_cache = False
@@ -576,6 +578,7 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach
self.assertEqual(pdh, actual)
self.assertEqual(1, req_mock.call_count)
+
@tutil.skip_sleep
@parameterized.parameterized_class([{"disk_cache": True}, {"disk_cache": False}])
class KeepClientCacheTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCacheBase):
@@ -619,9 +622,6 @@ class KeepClientCacheTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCacheB
self.assertNotEqual(head_resp, get_resp)
-
-
-
@tutil.skip_sleep
@parameterized.parameterized_class([{"disk_cache": True}, {"disk_cache": False}])
class KeepXRequestIdTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCacheBase):
@@ -875,6 +875,7 @@ class KeepClientRendezvousTestCase(unittest.TestCase, tutil.ApiClientMock, DiskC
def test_put_error_shows_probe_order(self):
self.check_64_zeros_error_order('put', arvados.errors.KeepWriteError)
+
@parameterized.parameterized_class([{"disk_cache": True}, {"disk_cache": False}])
class KeepClientTimeout(keepstub.StubKeepServers, unittest.TestCase, DiskCacheBase):
disk_cache = False
@@ -1022,6 +1023,7 @@ class KeepClientTimeout(keepstub.StubKeepServers, unittest.TestCase, DiskCacheBa
with self.assertRaises(arvados.errors.KeepWriteError):
kc.put(self.DATA, copies=1, num_retries=0)
+
@parameterized.parameterized_class([{"disk_cache": True}, {"disk_cache": False}])
class KeepClientGatewayTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCacheBase):
disk_cache = False
@@ -1124,6 +1126,7 @@ class KeepClientGatewayTestCase(unittest.TestCase, tutil.ApiClientMock, DiskCach
self.assertEqual('https://keep.xyzzy.arvadosapi.com/'+locator,
MockCurl.return_value.getopt(pycurl.URL).decode())
+
class KeepClientRetryTestMixin(object):
disk_cache = False
@@ -1244,6 +1247,7 @@ class KeepClientRetryGetTestCase(KeepClientRetryTestMixin, unittest.TestCase, Di
(self.DEFAULT_EXPECT, 200)):
self.check_success(locator=self.HINTED_LOCATOR)
+
@tutil.skip_sleep
@parameterized.parameterized_class([{"disk_cache": True}, {"disk_cache": False}])
class KeepClientRetryHeadTestCase(KeepClientRetryTestMixin, unittest.TestCase, DiskCacheBase):
@@ -1286,6 +1290,7 @@ class KeepClientRetryHeadTestCase(KeepClientRetryTestMixin, unittest.TestCase, D
(self.DEFAULT_EXPECT, 200)):
self.check_success(locator=self.HINTED_LOCATOR)
+
@tutil.skip_sleep
@parameterized.parameterized_class([{"disk_cache": True}, {"disk_cache": False}])
class KeepClientRetryPutTestCase(KeepClientRetryTestMixin, unittest.TestCase, DiskCacheBase):
@@ -1306,7 +1311,6 @@ class KeepClientRetryPutTestCase(KeepClientRetryTestMixin, unittest.TestCase, Di
class AvoidOverreplication(unittest.TestCase, tutil.ApiClientMock):
-
class FakeKeepService(object):
def __init__(self, delay, will_succeed=False, will_raise=None, replicas=1):
self.delay = delay
@@ -1333,6 +1337,7 @@ class AvoidOverreplication(unittest.TestCase, tutil.ApiClientMock):
def finished(self):
return False
+
def setUp(self):
self.copies = 3
self.pool = arvados.KeepClient.KeepWriterThreadPool(
@@ -1418,6 +1423,7 @@ class RetryNeedsMultipleServices(unittest.TestCase, tutil.ApiClientMock, DiskCac
self.keep_client.put('foo', num_retries=1, copies=2)
self.assertEqual(2, req_mock.call_count)
+
@parameterized.parameterized_class([{"disk_cache": True}, {"disk_cache": False}])
class KeepClientAPIErrorTest(unittest.TestCase, DiskCacheBase):
disk_cache = False
@@ -1464,7 +1470,6 @@ class KeepDiskCacheTestCase(unittest.TestCase, tutil.ApiClientMock):
def tearDown(self):
shutil.rmtree(self.disk_cache_dir)
-
@mock.patch('arvados.KeepClient.KeepService.get')
def test_disk_cache_read(self, get_mock):
# confirm it finds an existing cache block when the cache is
@@ -1483,7 +1488,6 @@ class KeepDiskCacheTestCase(unittest.TestCase, tutil.ApiClientMock):
get_mock.assert_not_called()
-
@mock.patch('arvados.KeepClient.KeepService.get')
def test_disk_cache_share(self, get_mock):
# confirm it finds a cache block written after the disk cache
@@ -1502,7 +1506,6 @@ class KeepDiskCacheTestCase(unittest.TestCase, tutil.ApiClientMock):
get_mock.assert_not_called()
-
def test_disk_cache_write(self):
# confirm the cache block was created
@@ -1518,7 +1521,6 @@ class KeepDiskCacheTestCase(unittest.TestCase, tutil.ApiClientMock):
with open(os.path.join(self.disk_cache_dir, self.locator[0:3], self.locator+".keepcacheblock"), "rb") as f:
self.assertTrue(tutil.binary_compare(f.read(), self.data))
-
def test_disk_cache_clean(self):
# confirm that a tmp file in the cache is cleaned up
@@ -1557,7 +1559,6 @@ class KeepDiskCacheTestCase(unittest.TestCase, tutil.ApiClientMock):
self.assertTrue(os.path.exists(os.path.join(self.disk_cache_dir, self.locator[0:3], "tmpXYZABC")))
self.assertTrue(os.path.exists(os.path.join(self.disk_cache_dir, self.locator[0:3], "XYZABC")))
-
@mock.patch('arvados.KeepClient.KeepService.get')
def test_disk_cache_cap(self, get_mock):
# confirm that the cache is kept to the desired limit
@@ -1580,7 +1581,6 @@ class KeepDiskCacheTestCase(unittest.TestCase, tutil.ApiClientMock):
self.assertFalse(os.path.exists(os.path.join(self.disk_cache_dir, self.locator[0:3], self.locator+".keepcacheblock")))
self.assertTrue(os.path.exists(os.path.join(self.disk_cache_dir, "acb", "acbd18db4cc2f85cedef654fccc4a4d8.keepcacheblock")))
-
@mock.patch('arvados.KeepClient.KeepService.get')
def test_disk_cache_share(self, get_mock):
# confirm that a second cache doesn't delete files that belong to the first cache.
@@ -1610,8 +1610,6 @@ class KeepDiskCacheTestCase(unittest.TestCase, tutil.ApiClientMock):
self.assertTrue(os.path.exists(os.path.join(self.disk_cache_dir, self.locator[0:3], self.locator+".keepcacheblock")))
self.assertTrue(os.path.exists(os.path.join(self.disk_cache_dir, "acb", "acbd18db4cc2f85cedef654fccc4a4d8.keepcacheblock")))
-
-
def test_disk_cache_error(self):
os.chmod(self.disk_cache_dir, stat.S_IRUSR)
@@ -1620,7 +1618,6 @@ class KeepDiskCacheTestCase(unittest.TestCase, tutil.ApiClientMock):
block_cache = arvados.keep.KeepBlockCache(disk_cache=True,
disk_cache_dir=self.disk_cache_dir)
-
def test_disk_cache_write_error(self):
block_cache = arvados.keep.KeepBlockCache(disk_cache=True,
disk_cache_dir=self.disk_cache_dir)
@@ -1636,7 +1633,6 @@ class KeepDiskCacheTestCase(unittest.TestCase, tutil.ApiClientMock):
with tutil.mock_keep_responses(self.data, 200) as mock:
keep_client.get(self.locator)
-
def test_disk_cache_retry_write_error(self):
block_cache = arvados.keep.KeepBlockCache(disk_cache=True,
disk_cache_dir=self.disk_cache_dir)
@@ -1669,7 +1665,6 @@ class KeepDiskCacheTestCase(unittest.TestCase, tutil.ApiClientMock):
# shrank the cache in response to ENOSPC
self.assertTrue(cache_max_before > block_cache.cache_max)
-
def test_disk_cache_retry_write_error2(self):
block_cache = arvados.keep.KeepBlockCache(disk_cache=True,
disk_cache_dir=self.disk_cache_dir)
commit 7af8f90cfe8c13323b3e3ec6f2ed4bbdf18d2520
Author: Brett Smith <brett.smith at curii.com>
Date: Fri May 24 11:04:03 2024 -0400
21020: Make arv-keepdocker use a cache from the environment
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/commands/keepdocker.py b/sdk/python/arvados/commands/keepdocker.py
index 6823ee1bea..188f8be457 100644
--- a/sdk/python/arvados/commands/keepdocker.py
+++ b/sdk/python/arvados/commands/keepdocker.py
@@ -18,6 +18,7 @@ import tempfile
import ciso8601
from operator import itemgetter
+from pathlib import Path
from stat import *
import arvados
@@ -27,6 +28,10 @@ import arvados.commands._util as arv_cmd
import arvados.commands.put as arv_put
from arvados._version import __version__
+from typing import (
+ Callable,
+)
+
logger = logging.getLogger('arvados.keepdocker')
logger.setLevel(logging.DEBUG if arvados.config.get('ARVADOS_DEBUG')
else logging.INFO)
@@ -181,9 +186,12 @@ def save_image(image_hash, image_file):
except STAT_CACHE_ERRORS:
pass # We won't resume from this cache. No big deal.
-def get_cache_dir():
- return arv_cmd.make_home_conf_dir(
- os.path.join('.cache', 'arvados', 'docker'), 0o700)
+def get_cache_dir(
+ mkparent: Callable[[], Path]=arvados.util._BaseDirectories('CACHE').storage_path,
+) -> str:
+ path = mkparent() / 'docker'
+ path.mkdir(mode=0o700, exist_ok=True)
+ return str(path)
def prep_image_file(filename):
# Return a file object ready to save a Docker image,
diff --git a/sdk/python/tests/test_arv_keepdocker.py b/sdk/python/tests/test_arv_keepdocker.py
index 5d23dfb378..c5bcfff41b 100644
--- a/sdk/python/tests/test_arv_keepdocker.py
+++ b/sdk/python/tests/test_arv_keepdocker.py
@@ -18,6 +18,7 @@ from pathlib import Path
from unittest import mock
import parameterized
+import pytest
import arvados.commands.keepdocker as arv_keepdocker
from . import arvados_testutil as tutil
@@ -255,3 +256,12 @@ class ImageMetadataTestCase(unittest.TestCase):
def test_image_config(self):
self.assertIsInstance(self.config, collections.abc.Mapping)
self.assertEqual(self.config.get('created'), '2023-05-02T16:49:27Z')
+
+
+def test_get_cache_dir(tmp_path):
+ actual = arv_keepdocker.get_cache_dir(lambda: tmp_path)
+ assert isinstance(actual, str)
+ actual = Path(actual)
+ assert actual.is_dir()
+ assert actual.name == 'docker'
+ assert actual.parent == tmp_path
commit 806072055c23ebbd9fcd4f595c6129f06e803c79
Author: Brett Smith <brett.smith at curii.com>
Date: Mon May 20 17:12:33 2024 -0400
21020: Make arvados.api.http_cache get a path from the environment
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py
index 8a17e42fcb..6b6ee36b3e 100644
--- a/sdk/python/arvados/api.py
+++ b/sdk/python/arvados/api.py
@@ -159,25 +159,22 @@ def http_cache(data_type: str) -> cache.SafeHTTPCache:
"""Set up an HTTP file cache
This function constructs and returns an `arvados.cache.SafeHTTPCache`
- backed by the filesystem under `~/.cache/arvados/`, or `None` if the
- directory cannot be set up. The return value can be passed to
+ backed by the filesystem under a cache directory from the environment, or
+ `None` if the directory cannot be set up. The return value can be passed to
`httplib2.Http` as the `cache` argument.
Arguments:
- * data_type: str --- The name of the subdirectory under `~/.cache/arvados`
+ * data_type: str --- The name of the subdirectory
where data is cached.
"""
try:
- homedir = pathlib.Path.home()
- except RuntimeError:
+ path = util._BaseDirectories('CACHE').storage_path() / data_type
+ path.mkdir(exist_ok=True)
+ except (OSError, RuntimeError):
return None
- path = pathlib.Path(homedir, '.cache', 'arvados', data_type)
- try:
- path.mkdir(parents=True, exist_ok=True)
- except OSError:
- return None
- return cache.SafeHTTPCache(str(path), max_age=60*60*24*2)
+ else:
+ return cache.SafeHTTPCache(str(path), max_age=60*60*24*2)
def api_client(
version: str,
@@ -211,8 +208,7 @@ def api_client(
Keyword-only arguments:
* cache: bool --- If true, loads the API discovery document from, or
- saves it to, a cache on disk (located at
- `~/.cache/arvados/discovery`).
+ saves it to, a cache on disk.
* http: httplib2.Http | None --- The HTTP client object the API client
object will use to make requests. If not provided, this function will
diff --git a/sdk/python/tests/test_cache.py b/sdk/python/tests/test_cache.py
index 41984a5bf9..33a19cbdbc 100644
--- a/sdk/python/tests/test_cache.py
+++ b/sdk/python/tests/test_cache.py
@@ -11,10 +11,12 @@ import tempfile
import threading
import unittest
+import pytest
from unittest import mock
import arvados
import arvados.cache
+import arvados.util
from . import run_test_server
def _random(n):
@@ -45,6 +47,23 @@ class CacheTestThread(threading.Thread):
raise
+class TestAPIHTTPCache:
+ @pytest.mark.parametrize('data_type', ['discovery', 'keep'])
+ def test_good_storage(self, tmp_path, monkeypatch, data_type):
+ monkeypatch.setattr(arvados.util._BaseDirectories, 'storage_path', lambda _: tmp_path)
+ actual = arvados.http_cache(data_type)
+ assert actual is not None
+ assert (tmp_path / data_type).is_dir()
+
+ @pytest.mark.parametrize('error', [RuntimeError, FileExistsError, PermissionError])
+ def test_unwritable_storage(self, monkeypatch, error):
+ def fail(self):
+ raise error()
+ monkeypatch.setattr(arvados.util._BaseDirectories, 'storage_path', fail)
+ actual = arvados.http_cache('unwritable')
+ assert actual is None
+
+
class CacheTest(unittest.TestCase):
def setUp(self):
self._dir = tempfile.mkdtemp()
@@ -52,17 +71,6 @@ class CacheTest(unittest.TestCase):
def tearDown(self):
shutil.rmtree(self._dir)
- def test_cache_create_error(self):
- _, filename = tempfile.mkstemp()
- home_was = os.environ['HOME']
- os.environ['HOME'] = filename
- try:
- c = arvados.http_cache('test')
- self.assertEqual(None, c)
- finally:
- os.environ['HOME'] = home_was
- os.unlink(filename)
-
def test_cache_crud(self):
c = arvados.cache.SafeHTTPCache(self._dir, max_age=0)
url = 'https://example.com/foo?bar=baz'
commit 2c5a3dbd6b56ca39d7b105b821f7061d2b102a56
Author: Brett Smith <brett.smith at curii.com>
Date: Mon May 20 16:26:21 2024 -0400
21020: Make arv-copy search from environment
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/commands/arv_copy.py b/sdk/python/arvados/commands/arv_copy.py
index eccf488efb..d8efa22556 100755
--- a/sdk/python/arvados/commands/arv_copy.py
+++ b/sdk/python/arvados/commands/arv_copy.py
@@ -13,9 +13,9 @@
# --no-recursive is given, arv-copy copies only the single record
# identified by object-uuid.
#
-# The user must have files $HOME/.config/arvados/{src}.conf and
-# $HOME/.config/arvados/{dst}.conf with valid login credentials for
-# instances src and dst. If either of these files is not found,
+# The user must have configuration files {src}.conf and
+# {dst}.conf in a standard configuration directory with valid login credentials
+# for instances src and dst. If either of these files is not found,
# arv-copy will issue an error.
import argparse
@@ -86,10 +86,10 @@ def main():
help='Perform copy even if the object appears to exist at the remote destination.')
copy_opts.add_argument(
'--src', dest='source_arvados',
- help='The cluster id of the source Arvados instance. May be either a pathname to a config file, or (for example) "foo" as shorthand for $HOME/.config/arvados/foo.conf. If not provided, will be inferred from the UUID of the object being copied.')
+ help='The cluster id of the source Arvados instance. May be either a pathname to a config file, or (for example) "foo" as shorthand for finding "foo.conf" under a systemd or XDG configuration directory. If not provided, will be inferred from the UUID of the object being copied.')
copy_opts.add_argument(
'--dst', dest='destination_arvados',
- help='The name of the destination Arvados instance (required). May be either a pathname to a config file, or (for example) "foo" as shorthand for $HOME/.config/arvados/foo.conf. If not provided, will use ARVADOS_API_HOST from environment.')
+ help='The name of the destination Arvados instance. May be either a pathname to a config file, or (for example) "foo" as shorthand for finding "foo.conf" under a systemd or XDG configuration directory. If not provided, will use ARVADOS_API_HOST from environment.')
copy_opts.add_argument(
'--recursive', dest='recursive', action='store_true',
help='Recursively copy any dependencies for this object, and subprojects. (default)')
@@ -197,8 +197,8 @@ def set_src_owner_uuid(resource, uuid, args):
# (either local or absolute) to a file with Arvados configuration
# settings.
#
-# Otherwise, it is presumed to be the name of a file in
-# $HOME/.config/arvados/instance_name.conf
+# Otherwise, it is presumed to be the name of a file in a standard
+# configuration directory.
#
def api_for_instance(instance_name, num_retries):
if not instance_name:
@@ -208,16 +208,22 @@ def api_for_instance(instance_name, num_retries):
if '/' in instance_name:
config_file = instance_name
else:
- config_file = os.path.join(os.environ['HOME'], '.config', 'arvados', "{}.conf".format(instance_name))
+ dirs = arvados.util._BaseDirectories('CONFIG')
+ config_file = next(dirs.search(f'{instance_name}.conf'), '')
try:
cfg = arvados.config.load(config_file)
- except (IOError, OSError) as e:
- abort(("Could not open config file {}: {}\n" +
+ except OSError as e:
+ if config_file:
+ verb = 'open'
+ else:
+ verb = 'find'
+ config_file = f'{instance_name}.conf'
+ abort(("Could not {} config file {}: {}\n" +
"You must make sure that your configuration tokens\n" +
"for Arvados instance {} are in {} and that this\n" +
"file is readable.").format(
- config_file, e, instance_name, config_file))
+ verb, config_file, e.strerror, instance_name, config_file))
if 'ARVADOS_API_HOST' in cfg and 'ARVADOS_API_TOKEN' in cfg:
api_is_insecure = (
diff --git a/sdk/python/tests/test_arv_copy.py b/sdk/python/tests/test_arv_copy.py
index 1af5c68e6c..3007629121 100644
--- a/sdk/python/tests/test_arv_copy.py
+++ b/sdk/python/tests/test_arv_copy.py
@@ -2,14 +2,18 @@
#
# SPDX-License-Identifier: Apache-2.0
+import itertools
import os
import sys
import tempfile
import unittest
import shutil
import arvados.api
+import arvados.util
from arvados.collection import Collection, CollectionReader
+import pytest
+
import arvados.commands.arv_copy as arv_copy
from . import arvados_testutil as tutil
from . import run_test_server
@@ -87,3 +91,69 @@ class ArvCopyVersionTestCase(run_test_server.TestCaseWithServers, tutil.VersionC
finally:
os.environ['HOME'] = home_was
shutil.rmtree(tmphome)
+
+
+class TestApiForInstance:
+ _token_counter = itertools.count(1)
+
+ @staticmethod
+ def api_config(version, **kwargs):
+ assert version == 'v1'
+ return kwargs
+
+ @pytest.fixture
+ def patch_api(self, monkeypatch):
+ monkeypatch.setattr(arvados, 'api', self.api_config)
+
+ @pytest.fixture
+ def config_file(self, tmp_path):
+ count = next(self._token_counter)
+ path = tmp_path / f'config{count}.conf'
+ with path.open('w') as config_file:
+ print(
+ "ARVADOS_API_HOST=localhost",
+ f"ARVADOS_API_TOKEN={self.expected_token(path)}",
+ sep="\n", file=config_file,
+ )
+ return path
+
+ @pytest.fixture
+ def patch_search(self, tmp_path, monkeypatch):
+ def search(self, name):
+ path = tmp_path / name
+ if path.exists():
+ yield path
+ monkeypatch.setattr(arvados.util._BaseDirectories, 'search', search)
+
+ def expected_token(self, path):
+ return f"v2/zzzzz-gj3su-{path.stem:>015s}/{path.stem:>050s}"
+
+ def test_from_environ(self, patch_api):
+ actual = arv_copy.api_for_instance('', 0)
+ assert actual == {}
+
+ def test_relative_path(self, patch_api, config_file, monkeypatch):
+ monkeypatch.chdir(config_file.parent)
+ actual = arv_copy.api_for_instance(f'./{config_file.name}', 0)
+ assert actual['host'] == 'localhost'
+ assert actual['token'] == self.expected_token(config_file)
+
+ def test_absolute_path(self, patch_api, config_file):
+ actual = arv_copy.api_for_instance(str(config_file), 0)
+ assert actual['host'] == 'localhost'
+ assert actual['token'] == self.expected_token(config_file)
+
+ def test_search_path(self, patch_api, patch_search, config_file):
+ actual = arv_copy.api_for_instance(config_file.stem, 0)
+ assert actual['host'] == 'localhost'
+ assert actual['token'] == self.expected_token(config_file)
+
+ def test_search_failed(self, patch_api, patch_search):
+ with pytest.raises(SystemExit) as exc_info:
+ arv_copy.api_for_instance('NotFound', 0)
+ assert exc_info.value.code > 0
+
+ def test_path_unreadable(self, patch_api, tmp_path):
+ with pytest.raises(SystemExit) as exc_info:
+ arv_copy.api_for_instance(str(tmp_path / 'nonexistent.conf'), 0)
+ assert exc_info.value.code > 0
commit 1fd1708ba097c8c1c77a9f9063d1736341c03a3f
Author: Brett Smith <brett.smith at curii.com>
Date: Mon May 20 16:06:12 2024 -0400
21020: Make arvados.config search from environment
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/config.py b/sdk/python/arvados/config.py
index 6f3bd02790..9b2483bcfe 100644
--- a/sdk/python/arvados/config.py
+++ b/sdk/python/arvados/config.py
@@ -10,19 +10,38 @@
import os
import re
+from typing import (
+ Callable,
+ Iterable,
+ Union,
+)
+
+from . import util
+
_settings = None
-if os.environ.get('HOME') is not None:
- default_config_file = os.environ['HOME'] + '/.config/arvados/settings.conf'
-else:
- default_config_file = ''
+default_config_file = ''
+""".. WARNING: Deprecated
+ Default configuration initialization now searches for the "default"
+ configuration in several places. This value no longer has any effect.
+"""
KEEP_BLOCK_SIZE = 2**26
EMPTY_BLOCK_LOCATOR = 'd41d8cd98f00b204e9800998ecf8427e+0'
-def initialize(config_file=default_config_file):
+def initialize(
+ config_file: Union[
+ str,
+ os.PathLike,
+ Callable[[str], Iterable[os.PathLike]],
+ ]=util._BaseDirectories('CONFIG').search,
+) -> None:
global _settings
_settings = {}
+ if callable(config_file):
+ search_paths = iter(config_file('settings.conf'))
+ config_file = next(search_paths, '')
+
# load the specified config file if available
try:
_settings = load(config_file)
diff --git a/sdk/python/tests/test_config.py b/sdk/python/tests/test_config.py
new file mode 100644
index 0000000000..4b5bca2e82
--- /dev/null
+++ b/sdk/python/tests/test_config.py
@@ -0,0 +1,58 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+import pytest
+
+from arvados import config as arv_config
+
+class TestInitialize:
+ @pytest.fixture(autouse=True)
+ def setup(self, monkeypatch):
+ arv_config._settings = None
+ monkeypatch.delenv('ARVADOS_API_HOST', raising=False)
+ monkeypatch.delenv('ARVADOS_API_TOKEN', raising=False)
+ try:
+ yield
+ finally:
+ arv_config._settings = None
+
+ @pytest.fixture
+ def tmp_settings(self, tmp_path):
+ path = tmp_path / 'settings.conf'
+ with path.open('w') as settings_file:
+ print("ARVADOS_API_HOST=localhost", file=settings_file)
+ print("ARVADOS_API_TOKEN=TestInitialize", file=settings_file)
+ return path
+
+ def test_static_path(self, tmp_settings):
+ arv_config.initialize(tmp_settings)
+ actual = arv_config.settings()
+ assert actual['ARVADOS_API_HOST'] == 'localhost'
+ assert actual['ARVADOS_API_TOKEN'] == 'TestInitialize'
+
+ def test_search_path(self, tmp_settings):
+ def search(filename):
+ assert filename == tmp_settings.name
+ yield tmp_settings
+ arv_config.initialize(search)
+ actual = arv_config.settings()
+ assert actual['ARVADOS_API_HOST'] == 'localhost'
+ assert actual['ARVADOS_API_TOKEN'] == 'TestInitialize'
+
+ def test_default_search(self, tmp_settings, monkeypatch):
+ monkeypatch.setenv('CONFIGURATION_DIRECTORY', str(tmp_settings.parent))
+ monkeypatch.setenv('XDG_CONFIG_HOME', str(tmp_settings.parent))
+ monkeypatch.delenv('XDG_CONFIG_DIRS', raising=False)
+ actual = arv_config.settings()
+ assert actual['ARVADOS_API_HOST'] == 'localhost'
+ assert actual['ARVADOS_API_TOKEN'] == 'TestInitialize'
+
+ def test_environ_override(self, monkeypatch):
+ monkeypatch.setenv('ARVADOS_API_TOKEN', 'test_environ_override')
+ arv_config.initialize('')
+ actual = arv_config.settings()
+ assert actual.get('ARVADOS_API_HOST') is None
+ assert actual['ARVADOS_API_TOKEN'] == 'test_environ_override'
commit 5a1afcdb8fd97eb781513b74444fc5b492013d75
Author: Brett Smith <brett.smith at curii.com>
Date: Thu May 16 16:04:39 2024 -0400
21020: Introduce BaseDirectory classes to arvados.util
This is common functionality that will be used throughout the Python
tools to support base directory searching.
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py
index 050c67f68d..8f7f06b753 100644
--- a/sdk/python/arvados/util.py
+++ b/sdk/python/arvados/util.py
@@ -7,25 +7,32 @@ This module provides functions and constants that are useful across a variety
of Arvados resource types, or extend the Arvados API client (see `arvados.api`).
"""
+import dataclasses
+import enum
import errno
import fcntl
import functools
import hashlib
import httplib2
+import itertools
import os
import random
import re
+import stat
import subprocess
import sys
import warnings
import arvados.errors
+from pathlib import Path, PurePath
from typing import (
Any,
Callable,
Dict,
Iterator,
+ Mapping,
+ Optional,
TypeVar,
Union,
)
@@ -126,6 +133,122 @@ def _deprecated(version=None, preferred=None):
return deprecated_wrapper
return deprecated_decorator
+ at dataclasses.dataclass
+class _BaseDirectorySpec:
+ """Parse base directories
+
+ A _BaseDirectorySpec defines all the environment variable keys and defaults
+ related to a set of base directories (cache, config, state, etc.). It
+ provides pure methods to parse environment settings into valid paths.
+ """
+ systemd_key: str
+ xdg_home_key: str
+ xdg_home_default: PurePath
+ xdg_dirs_key: Optional[str] = None
+ xdg_dirs_default: str = ''
+
+ @staticmethod
+ def _abspath_from_env(env: Mapping[str, str], key: str) -> Optional[Path]:
+ try:
+ path = Path(env[key])
+ except (KeyError, ValueError):
+ ok = False
+ else:
+ ok = path.is_absolute()
+ return path if ok else None
+
+ @staticmethod
+ def _iter_abspaths(value: str) -> Iterator[Path]:
+ for path_s in value.split(':'):
+ path = Path(path_s)
+ if path.is_absolute():
+ yield path
+
+ def iter_systemd(self, env: Mapping[str, str]) -> Iterator[Path]:
+ return self._iter_abspaths(env.get(self.systemd_key, ''))
+
+ def iter_xdg(self, env: Mapping[str, str], subdir: PurePath) -> Iterator[Path]:
+ yield self.xdg_home(env, subdir)
+ if self.xdg_dirs_key is not None:
+ for path in self._iter_abspaths(env.get(self.xdg_dirs_key) or self.xdg_dirs_default):
+ yield path / subdir
+
+ def xdg_home(self, env: Mapping[str, str], subdir: PurePath) -> Path:
+ home_path = self._abspath_from_env(env, self.xdg_home_key)
+ if home_path is None:
+ home_path = self._abspath_from_env(env, 'HOME') or Path.home()
+ home_path /= self.xdg_home_default
+ return home_path / subdir
+
+
+class _BaseDirectorySpecs(enum.Enum):
+ """Base directory specifications
+
+ This enum provides easy access to the standard base directory settings.
+ """
+ CACHE = _BaseDirectorySpec(
+ 'CACHE_DIRECTORY',
+ 'XDG_CACHE_HOME',
+ PurePath('.cache'),
+ )
+ CONFIG = _BaseDirectorySpec(
+ 'CONFIGURATION_DIRECTORY',
+ 'XDG_CONFIG_HOME',
+ PurePath('.config'),
+ 'XDG_CONFIG_DIRS',
+ '/etc/xdg',
+ )
+ STATE = _BaseDirectorySpec(
+ 'STATE_DIRECTORY',
+ 'XDG_STATE_HOME',
+ PurePath('.local', 'state'),
+ )
+
+
+class _BaseDirectories:
+ """Resolve paths from a base directory spec
+
+ Given a _BaseDirectorySpec, this class provides stateful methods to find
+ existing files and return the most-preferred directory for writing.
+ """
+ _STORE_MODE = stat.S_IFDIR | stat.S_IWUSR
+
+ def __init__(
+ self,
+ spec: Union[_BaseDirectorySpec, _BaseDirectorySpecs, str],
+ env: Mapping[str, str]=os.environ,
+ xdg_subdir: Union[os.PathLike, str]='arvados',
+ ) -> None:
+ if isinstance(spec, str):
+ spec = _BaseDirectorySpecs[spec].value
+ elif isinstance(spec, _BaseDirectorySpecs):
+ spec = spec.value
+ self._spec = spec
+ self._env = env
+ self._xdg_subdir = PurePath(xdg_subdir)
+
+ def search(self, name: str) -> Iterator[Path]:
+ for search_path in itertools.chain(
+ self._spec.iter_systemd(self._env),
+ self._spec.iter_xdg(self._env, self._xdg_subdir),
+ ):
+ path = search_path / name
+ if path.exists():
+ yield path
+
+ def storage_path(self) -> Path:
+ for path in self._spec.iter_systemd(self._env):
+ try:
+ mode = path.stat().st_mode
+ except OSError:
+ continue
+ if (mode & self._STORE_MODE) == self._STORE_MODE:
+ return path
+ path = self._spec.xdg_home(self._env, self._xdg_subdir)
+ path.mkdir(parents=True, exist_ok=True)
+ return path
+
+
def is_hex(s: str, *length_args: int) -> bool:
"""Indicate whether a string is a hexadecimal number
diff --git a/sdk/python/tests/test_util.py b/sdk/python/tests/test_util.py
index 75d4a89e30..f3f01addd9 100644
--- a/sdk/python/tests/test_util.py
+++ b/sdk/python/tests/test_util.py
@@ -4,10 +4,13 @@
import itertools
import os
-import parameterized
+import stat
import subprocess
import unittest
+import parameterized
+import pytest
+from pathlib import Path
from unittest import mock
import arvados
@@ -216,3 +219,141 @@ class KeysetListAllTestCase(unittest.TestCase):
self.assertTrue(len(calls) >= 2, "list_func() not called enough to exhaust items")
for args, kwargs in calls:
self.assertEqual(set(kwargs.get('select', ())), expect_select)
+
+
+class TestBaseDirectories:
+ SELF_PATH = Path(__file__)
+
+ @pytest.fixture
+ def dir_spec(self, tmp_path):
+ return arvados.util._BaseDirectorySpec(
+ 'TEST_DIRECTORY',
+ 'XDG_TEST_HOME',
+ Path('.test'),
+ 'XDG_TEST_DIRS',
+ f"{tmp_path / '.test1'}:{tmp_path / '.test2'}",
+ )
+
+ @pytest.fixture
+ def env(self, tmp_path):
+ return {'HOME': str(tmp_path)}
+
+ def test_search_systemd_dirs(self, dir_spec, env, tmp_path):
+ env['TEST_DIRECTORY'] = f'{tmp_path}:{self.SELF_PATH.parent}'
+ dirs = arvados.util._BaseDirectories(dir_spec, env, 'tests')
+ actual = list(dirs.search(self.SELF_PATH.name))
+ assert actual == [self.SELF_PATH]
+
+ def test_search_xdg_home(self, dir_spec, env, tmp_path):
+ env['XDG_TEST_HOME'] = str(self.SELF_PATH.parent.parent)
+ dirs = arvados.util._BaseDirectories(dir_spec, env, 'tests')
+ actual = list(dirs.search(self.SELF_PATH.name))
+ assert actual == [self.SELF_PATH]
+
+ def test_search_xdg_dirs(self, dir_spec, env, tmp_path):
+ env['XDG_TEST_DIRS'] = f'{tmp_path}:{self.SELF_PATH.parent.parent}'
+ dirs = arvados.util._BaseDirectories(dir_spec, env, 'tests')
+ actual = list(dirs.search(self.SELF_PATH.name))
+ assert actual == [self.SELF_PATH]
+
+ def test_search_all_dirs(self, dir_spec, env, tmp_path):
+ env['TEST_DIRECTORY'] = f'{tmp_path}:{self.SELF_PATH.parent}'
+ env['XDG_TEST_HOME'] = str(self.SELF_PATH.parent.parent)
+ env['XDG_TEST_DIRS'] = f'{tmp_path}:{self.SELF_PATH.parent.parent}'
+ dirs = arvados.util._BaseDirectories(dir_spec, env, 'tests')
+ actual = list(dirs.search(self.SELF_PATH.name))
+ assert actual == [self.SELF_PATH, self.SELF_PATH, self.SELF_PATH]
+
+ def test_search_default_home(self, dir_spec, env, tmp_path):
+ expected = tmp_path / dir_spec.xdg_home_default / 'default_home'
+ expected.parent.mkdir()
+ expected.touch()
+ dirs = arvados.util._BaseDirectories(dir_spec, env, '.')
+ actual = list(dirs.search(expected.name))
+ assert actual == [expected]
+
+ def test_search_default_dirs(self, dir_spec, env, tmp_path):
+ _, _, default_dir = dir_spec.xdg_dirs_default.rpartition(':')
+ expected = Path(default_dir, 'default_dirs')
+ expected.parent.mkdir()
+ expected.touch()
+ dirs = arvados.util._BaseDirectories(dir_spec, env, '.')
+ actual = list(dirs.search(expected.name))
+ assert actual == [expected]
+
+ def test_search_no_default_dirs(self, dir_spec, env, tmp_path):
+ dir_spec.xdg_dirs_key = None
+ dir_spec.xdg_dirs_default = None
+ for subdir in ['.test1', '.test2', dir_spec.xdg_home_default]:
+ expected = tmp_path / subdir / 'no_dirs'
+ expected.parent.mkdir()
+ expected.touch()
+ dirs = arvados.util._BaseDirectories(dir_spec, env, '.')
+ actual = list(dirs.search(expected.name))
+ assert actual == [expected]
+
+ def test_ignore_relative_directories(self, dir_spec, env, tmp_path):
+ test_path = Path(*self.SELF_PATH.parts[-2:])
+ assert test_path.exists(), "test setup problem: need an existing file in a subdirectory of ."
+ parent_path = str(test_path.parent)
+ env['TEST_DIRECTORY'] = '.'
+ env['XDG_TEST_HOME'] = parent_path
+ env['XDG_TEST_DIRS'] = parent_path
+ dirs = arvados.util._BaseDirectories(dir_spec, env, parent_path)
+ assert not list(dirs.search(test_path.name))
+
+ def test_storage_path_systemd(self, dir_spec, env, tmp_path):
+ expected = tmp_path / 'rwsystemd'
+ expected.mkdir(0o700)
+ env['TEST_DIRECTORY'] = str(expected)
+ dirs = arvados.util._BaseDirectories(dir_spec, env)
+ assert dirs.storage_path() == expected
+
+ def test_storage_path_systemd_mixed_modes(self, dir_spec, env, tmp_path):
+ rodir = tmp_path / 'rodir'
+ rodir.mkdir(0o500)
+ expected = tmp_path / 'rwdir'
+ expected.mkdir(0o700)
+ env['TEST_DIRECTORY'] = f'{rodir}:{expected}'
+ dirs = arvados.util._BaseDirectories(dir_spec, env)
+ assert dirs.storage_path() == expected
+
+ def test_storage_path_xdg_home(self, dir_spec, env, tmp_path):
+ expected = tmp_path / '.xdghome' / 'arvados'
+ env['XDG_TEST_HOME'] = str(expected.parent)
+ dirs = arvados.util._BaseDirectories(dir_spec, env)
+ assert dirs.storage_path() == expected
+ exp_mode = stat.S_IFDIR | stat.S_IWUSR
+ assert (expected.stat().st_mode & exp_mode) == exp_mode
+
+ def test_storage_path_default(self, dir_spec, env, tmp_path):
+ expected = tmp_path / dir_spec.xdg_home_default / 'arvados'
+ dirs = arvados.util._BaseDirectories(dir_spec, env)
+ assert dirs.storage_path() == expected
+ exp_mode = stat.S_IFDIR | stat.S_IWUSR
+ assert (expected.stat().st_mode & exp_mode) == exp_mode
+
+ def test_empty_xdg_home(self, dir_spec, env, tmp_path):
+ env['XDG_TEST_HOME'] = ''
+ expected = tmp_path / dir_spec.xdg_home_default / 'emptyhome'
+ dirs = arvados.util._BaseDirectories(dir_spec, env, expected.name)
+ assert dirs.storage_path() == expected
+
+ def test_empty_xdg_dirs(self, dir_spec, env, tmp_path):
+ env['XDG_TEST_DIRS'] = ''
+ _, _, default_dir = dir_spec.xdg_dirs_default.rpartition(':')
+ expected = Path(default_dir, 'empty_dirs')
+ expected.parent.mkdir()
+ expected.touch()
+ dirs = arvados.util._BaseDirectories(dir_spec, env, '.')
+ actual = list(dirs.search(expected.name))
+ assert actual == [expected]
+
+ def test_spec_key_lookup(self):
+ dirs = arvados.util._BaseDirectories('CACHE')
+ assert dirs._spec.systemd_key == 'CACHE_DIRECTORY'
+ assert dirs._spec.xdg_dirs_key is None
+
+ def test_spec_enum_lookup(self):
+ dirs = arvados.util._BaseDirectories(arvados.util._BaseDirectorySpecs.CONFIG)
+ assert dirs._spec.systemd_key == 'CONFIGURATION_DIRECTORY'
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list