[arvados] created: 2.7.0-6584-g01608f9f85

git repository hosting git at public.arvados.org
Thu May 16 20:12:12 UTC 2024


        at  01608f9f85be384436af955b782b517f92e528f2 (commit)


commit 01608f9f85be384436af955b782b517f92e528f2
Author: Brett Smith <brett.smith at curii.com>
Date:   Thu May 16 16:04:39 2024 -0400

    21020: Introduce BaseDirectory classes to arvados.util
    
    This is common functionality that will be used throughout the Python
    tools to support base directory searching.
    
    Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>

diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py
index 050c67f68d..29621b274f 100644
--- a/sdk/python/arvados/util.py
+++ b/sdk/python/arvados/util.py
@@ -7,25 +7,32 @@ This module provides functions and constants that are useful across a variety
 of Arvados resource types, or extend the Arvados API client (see `arvados.api`).
 """
 
+import dataclasses
+import enum
 import errno
 import fcntl
 import functools
 import hashlib
 import httplib2
+import itertools
 import os
 import random
 import re
+import stat
 import subprocess
 import sys
 import warnings
 
 import arvados.errors
 
+from pathlib import Path, PurePath
 from typing import (
     Any,
     Callable,
     Dict,
     Iterator,
+    Mapping,
+    Optional,
     TypeVar,
     Union,
 )
@@ -126,6 +133,122 @@ def _deprecated(version=None, preferred=None):
         return deprecated_wrapper
     return deprecated_decorator
 
+ at dataclasses.dataclass
+class _BaseDirectorySpec:
+    """Parse base directories
+
+    A _BaseDirectorySpec defines all the environment variable keys and defaults
+    related to a set of base directories (cache, config, state, etc.). It
+    provides pure methods to parse environment settings into valid paths.
+    """
+    systemd_key: str
+    xdg_home_key: str
+    xdg_home_default: PurePath
+    xdg_dirs_key: Optional[str] = None
+    xdg_dirs_default: str = ''
+
+    @staticmethod
+    def _abspath_from_env(env: Mapping[str, str], key: str) -> Optional[Path]:
+        try:
+            path = Path(env[key])
+        except (KeyError, ValueError):
+            ok = False
+        else:
+            ok = path.is_absolute()
+        return path if ok else None
+
+    @staticmethod
+    def _iter_abspaths(value: str) -> Iterator[Path]:
+        for path_s in value.split(':'):
+            path = Path(path_s)
+            if path.is_absolute():
+                yield path
+
+    def iter_systemd(self, env: Mapping[str, str]) -> Iterator[Path]:
+        return self._iter_abspaths(env.get(self.systemd_key, ''))
+
+    def iter_xdg(self, env: Mapping[str, str], subdir: PurePath) -> Iterator[Path]:
+        yield self.xdg_home(env, subdir)
+        if self.xdg_dirs_key is not None:
+            for path in self._iter_abspaths(env.get(self.xdg_dirs_key) or self.xdg_dirs_default):
+                yield path / subdir
+
+    def xdg_home(self, env: Mapping[str, str], subdir: PurePath) -> Path:
+        home_path = self._abspath_from_env(env, self.xdg_home_key)
+        if home_path is None:
+            home_path = self._abspath_from_env(env, 'HOME') or Path.home()
+            home_path /= self.xdg_home_default
+        return home_path / subdir
+
+
+class _BaseDirectorySpecs(enum.Enum):
+    """Base directory specifications
+
+    This enum provides easy access to the standard base directory settings.
+    """
+    CACHE = _BaseDirectorySpec(
+        'CACHE_DIRECTORY',
+        'XDG_CACHE_HOME',
+        PurePath('.cache'),
+    )
+    CONFIG = _BaseDirectorySpec(
+        'CONFIG_DIRECTORY',
+        'XDG_CONFIG_HOME',
+        PurePath('.config'),
+        'XDG_CONFIG_DIRS',
+        '/etc/xdg',
+    )
+    STATE = _BaseDirectorySpec(
+        'STATE_DIRECTORY',
+        'XDG_STATE_HOME',
+        PurePath('.local', 'state'),
+    )
+
+
+class _BaseDirectories:
+    """Resolve paths from a base directory spec
+
+    Given a _BaseDirectorySpec, this class provides stateful methods to find
+    existing files and return the most-preferred directory for writing.
+    """
+    _STORE_MODE = stat.S_IFDIR | stat.S_IWUSR
+
+    def __init__(
+            self,
+            spec: Union[_BaseDirectorySpec, _BaseDirectorySpecs, str],
+            env: Mapping[str, str]=os.environ,
+            xdg_subdir: Union[os.PathLike, str]='arvados',
+    ) -> None:
+        if isinstance(spec, str):
+            spec = _BaseDirectorySpecs[spec].value
+        elif isinstance(spec, _BaseDirectorySpecs):
+            spec = spec.value
+        self._spec = spec
+        self._env = env
+        self._xdg_subdir = PurePath(xdg_subdir)
+
+    def search(self, name: str) -> Iterator[Path]:
+        for search_path in itertools.chain(
+                self._spec.iter_systemd(self._env),
+                self._spec.iter_xdg(self._env, self._xdg_subdir),
+        ):
+            path = search_path / name
+            if path.exists():
+                yield path
+
+    def storage_path(self) -> Path:
+        for path in self._spec.iter_systemd(self._env):
+            try:
+                mode = path.stat().st_mode
+            except OSError:
+                continue
+            if (mode & self._STORE_MODE) == self._STORE_MODE:
+                return path
+        path = self._spec.xdg_home(self._env, self._xdg_subdir)
+        path.mkdir(parents=True, exist_ok=True)
+        return path
+
+
 def is_hex(s: str, *length_args: int) -> bool:
     """Indicate whether a string is a hexadecimal number
 
diff --git a/sdk/python/tests/test_util.py b/sdk/python/tests/test_util.py
index 75d4a89e30..17df9805a3 100644
--- a/sdk/python/tests/test_util.py
+++ b/sdk/python/tests/test_util.py
@@ -4,10 +4,13 @@
 
 import itertools
 import os
-import parameterized
+import stat
 import subprocess
 import unittest
 
+import parameterized
+import pytest
+from pathlib import Path
 from unittest import mock
 
 import arvados
@@ -216,3 +219,141 @@ class KeysetListAllTestCase(unittest.TestCase):
         self.assertTrue(len(calls) >= 2, "list_func() not called enough to exhaust items")
         for args, kwargs in calls:
             self.assertEqual(set(kwargs.get('select', ())), expect_select)
+
+
+class TestBaseDirectories:
+    SELF_PATH = Path(__file__)
+
+    @pytest.fixture
+    def dir_spec(self, tmp_path):
+        return arvados.util._BaseDirectorySpec(
+            'TEST_DIRECTORY',
+            'XDG_TEST_HOME',
+            Path('.test'),
+            'XDG_TEST_DIRS',
+            f"{tmp_path / '.test1'}:{tmp_path / '.test2'}",
+        )
+
+    @pytest.fixture
+    def env(self, tmp_path):
+        return {'HOME': str(tmp_path)}
+
+    def test_search_systemd_dirs(self, dir_spec, env, tmp_path):
+        env['TEST_DIRECTORY'] = f'{tmp_path}:{self.SELF_PATH.parent}'
+        dirs = arvados.util._BaseDirectories(dir_spec, env, 'tests')
+        actual = list(dirs.search(self.SELF_PATH.name))
+        assert actual == [self.SELF_PATH]
+
+    def test_search_xdg_home(self, dir_spec, env, tmp_path):
+        env['XDG_TEST_HOME'] = str(self.SELF_PATH.parent.parent)
+        dirs = arvados.util._BaseDirectories(dir_spec, env, 'tests')
+        actual = list(dirs.search(self.SELF_PATH.name))
+        assert actual == [self.SELF_PATH]
+
+    def test_search_xdg_dirs(self, dir_spec, env, tmp_path):
+        env['XDG_TEST_DIRS'] = f'{tmp_path}:{self.SELF_PATH.parent.parent}'
+        dirs = arvados.util._BaseDirectories(dir_spec, env, 'tests')
+        actual = list(dirs.search(self.SELF_PATH.name))
+        assert actual == [self.SELF_PATH]
+        
+    def test_search_all_dirs(self, dir_spec, env, tmp_path):
+        env['TEST_DIRECTORY'] = f'{tmp_path}:{self.SELF_PATH.parent}'
+        env['XDG_TEST_HOME'] = str(self.SELF_PATH.parent.parent)
+        env['XDG_TEST_DIRS'] = f'{tmp_path}:{self.SELF_PATH.parent.parent}'
+        dirs = arvados.util._BaseDirectories(dir_spec, env, 'tests')
+        actual = list(dirs.search(self.SELF_PATH.name))
+        assert actual == [self.SELF_PATH, self.SELF_PATH, self.SELF_PATH]
+        
+    def test_search_default_home(self, dir_spec, env, tmp_path):
+        expected = tmp_path / dir_spec.xdg_home_default / 'default_home'
+        expected.parent.mkdir()
+        expected.touch()
+        dirs = arvados.util._BaseDirectories(dir_spec, env, '.')
+        actual = list(dirs.search(expected.name))
+        assert actual == [expected]
+        
+    def test_search_default_dirs(self, dir_spec, env, tmp_path):
+        _, _, default_dir = dir_spec.xdg_dirs_default.rpartition(':')
+        expected = Path(default_dir, 'default_dirs')
+        expected.parent.mkdir()
+        expected.touch()
+        dirs = arvados.util._BaseDirectories(dir_spec, env, '.')
+        actual = list(dirs.search(expected.name))
+        assert actual == [expected]
+
+    def test_search_no_default_dirs(self, dir_spec, env, tmp_path):
+        dir_spec.xdg_dirs_key = None
+        dir_spec.xdg_dirs_default = None
+        for subdir in ['.test1', '.test2', dir_spec.xdg_home_default]:
+            expected = tmp_path / subdir / 'no_dirs'
+            expected.parent.mkdir()
+            expected.touch()
+        dirs = arvados.util._BaseDirectories(dir_spec, env, '.')
+        actual = list(dirs.search(expected.name))
+        assert actual == [expected]
+
+    def test_ignore_relative_directories(self, dir_spec, env, tmp_path):
+        test_path = Path(*self.SELF_PATH.parts[-2:])
+        assert test_path.exists(), "test setup problem: need an existing file in a subdirectory of ."
+        parent_path = str(test_path.parent)
+        env['TEST_DIRECTORY'] = '.'
+        env['XDG_TEST_HOME'] = parent_path
+        env['XDG_TEST_DIRS'] = parent_path
+        dirs = arvados.util._BaseDirectories(dir_spec, env, parent_path)
+        assert not list(dirs.search(test_path.name))
+
+    def test_store_path_systemd(self, dir_spec, env, tmp_path):
+        expected = tmp_path / 'rwsystemd'
+        expected.mkdir(0o700)
+        env['TEST_DIRECTORY'] = str(expected)
+        dirs = arvados.util._BaseDirectories(dir_spec, env)
+        assert dirs.storage_path() == expected
+
+    def test_store_path_systemd_mixed_modes(self, dir_spec, env, tmp_path):
+        rodir = tmp_path / 'rodir'
+        rodir.mkdir(0o500)
+        expected = tmp_path / 'rwdir'
+        expected.mkdir(0o700)
+        env['TEST_DIRECTORY'] = f'{rodir}:{expected}'
+        dirs = arvados.util._BaseDirectories(dir_spec, env)
+        assert dirs.storage_path() == expected
+        
+    def test_store_path_xdg_home(self, dir_spec, env, tmp_path):
+        expected = tmp_path / '.xdghome' / 'arvados'
+        env['XDG_TEST_HOME'] = str(expected.parent)
+        dirs = arvados.util._BaseDirectories(dir_spec, env)
+        assert dirs.storage_path() == expected
+        exp_mode = stat.S_IFDIR | stat.S_IWUSR
+        assert (expected.stat().st_mode & exp_mode) == exp_mode
+        
+    def test_store_path_default(self, dir_spec, env, tmp_path):
+        expected = tmp_path / dir_spec.xdg_home_default / 'arvados'
+        dirs = arvados.util._BaseDirectories(dir_spec, env)
+        assert dirs.storage_path() == expected
+        exp_mode = stat.S_IFDIR | stat.S_IWUSR
+        assert (expected.stat().st_mode & exp_mode) == exp_mode
+
+    def test_empty_xdg_home(self, dir_spec, env, tmp_path):
+        env['XDG_TEST_HOME'] = ''
+        expected = tmp_path / dir_spec.xdg_home_default / 'emptyhome'
+        dirs = arvados.util._BaseDirectories(dir_spec, env, expected.name)
+        assert dirs.storage_path() == expected
+
+    def test_empty_xdg_dirs(self, dir_spec, env, tmp_path):
+        env['XDG_TEST_DIRS'] = ''
+        _, _, default_dir = dir_spec.xdg_dirs_default.rpartition(':')
+        expected = Path(default_dir, 'empty_dirs')
+        expected.parent.mkdir()
+        expected.touch()
+        dirs = arvados.util._BaseDirectories(dir_spec, env, '.')
+        actual = list(dirs.search(expected.name))
+        assert actual == [expected]
+
+    def test_spec_key_lookup(self):
+        dirs = arvados.util._BaseDirectories('CACHE')
+        assert dirs._spec.systemd_key == 'CACHE_DIRECTORY'
+        assert dirs._spec.xdg_dirs_key is None
+        
+    def test_spec_enum_lookup(self):
+        dirs = arvados.util._BaseDirectories(arvados.util._BaseDirectorySpecs.CONFIG)
+        assert dirs._spec.systemd_key == 'CONFIG_DIRECTORY'

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list