[ARVADOS] created: 7b93dc26eb285261a6a431daa511edfb7392a3a1
Git user
git at public.curoverse.com
Wed Aug 9 19:16:40 EDT 2017
at 7b93dc26eb285261a6a431daa511edfb7392a3a1 (commit)
commit 7b93dc26eb285261a6a431daa511edfb7392a3a1
Author: Lucas Di Pentima <lucas at curoverse.com>
Date: Wed Aug 9 20:14:07 2017 -0300
8937: Added cache validation method to arv-put. For now it only
checks that the first signed block's locator is valid.
Added integration test.
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas at curoverse.com>
diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index afd9bdc..3a66929 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -796,8 +796,16 @@ class ArvPutUploadJob(object):
arv_cmd.make_home_conf_dir(self.CACHE_DIR, 0o700, 'raise'),
cache_filename)
if self.resume and os.path.exists(cache_filepath):
- self.logger.info("Resuming upload from cache file {}".format(cache_filepath))
- self._cache_file = open(cache_filepath, 'a+')
+ if self._cache_is_valid(cache_filepath):
+ self.logger.info(
+ "Resuming upload from cache file {}".format(
+ cache_filepath))
+ self._cache_file = open(cache_filepath, 'a+')
+ else:
+ self.logger.info(
+ "Cache file {} is not valid, starting from scratch".format(
+ cache_filepath))
+ self._cache_file = open(cache_filepath, 'w+')
else:
# --no-resume means start with a empty cache file.
self.logger.info("Creating new cache file at {}".format(cache_filepath))
@@ -823,6 +831,23 @@ class ArvPutUploadJob(object):
# Load the previous manifest so we can check if files were modified remotely.
self._local_collection = arvados.collection.Collection(self._state['manifest'], replication_desired=self.replication_desired, put_threads=self.put_threads)
+ def _cache_is_valid(self, filepath):
+ try:
+ with open(filepath, 'r') as cache_file:
+ manifest = json.load(cache_file)['manifest']
+ kc = arvados.keep.KeepClient(api_client=api_client)
+ # Check that the first block's token (oldest) is valid
+ for line in manifest.split('\n'):
+ match = arvados.util.signed_locator_pattern.search(line)
+ if match is not None:
+ loc = match.group(0)
+ return kc.head(loc, num_retries=self.num_retries)
+ # No signed locator found, all ok.
+ return True
+ except Exception as e:
+ self.logger.info("Something wrong happened when checking cache file: {}".format(e))
+ return False
+
def collection_file_paths(self, col, path_prefix='.'):
"""Return a list of file paths by recursively go through the entire collection `col`"""
file_paths = []
diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py
index ce66181..9310318 100644
--- a/sdk/python/tests/test_arv_put.py
+++ b/sdk/python/tests/test_arv_put.py
@@ -9,21 +9,22 @@ standard_library.install_aliases()
from builtins import str
from builtins import range
import apiclient
+import hashlib
+import json
import mock
import os
import pwd
+import random
import re
import shutil
import subprocess
import sys
import tempfile
+import threading
import time
import unittest
-import yaml
-import threading
-import hashlib
-import random
import uuid
+import yaml
import arvados
import arvados.commands.put as arv_put
@@ -840,6 +841,43 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
self.assertEqual(1, len(collection_list))
return collection_list[0]
+ def test_invalid_token_invalidates_cache(self):
+ self.authorize_with('active')
+ tmpdir = self.make_tmpdir()
+ with open(os.path.join(tmpdir, 'somefile.txt'), 'w') as f:
+ f.write('foo')
+ # Upload a directory and get the cache file name
+ p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=self.ENVIRON)
+ (out, err) = p.communicate()
+ self.assertRegex(err.decode(), r'INFO: Creating new cache file at ')
+ self.assertEqual(p.returncode, 0)
+ cache_filepath = re.search(r'INFO: Creating new cache file at (.*)',
+ err.decode()).groups()[0]
+ self.assertTrue(os.path.isfile(cache_filepath))
+ # Load the cache file contents and modify the manifest to simulate
+ # an invalid access token
+ with open(cache_filepath, 'r') as c:
+ cache = json.load(c)
+ self.assertRegex(cache['manifest'], r'\+A\S+\@')
+ cache['manifest'] = re.sub(r'\+A\S+\@',
+ '+Athistokendoesnotwork@',
+ cache['manifest'])
+ with open(cache_filepath, 'w') as c:
+ c.write(json.dumps(cache))
+ # Re-run the upload and expect to get an invalid cache message
+ p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=self.ENVIRON)
+ (out, err) = p.communicate()
+ self.assertRegex(
+ err.decode(),
+ r'INFO: Cache file (.*) is not valid, starting from scratch')
+ self.assertEqual(p.returncode, 0)
+
def test_put_collection_with_later_update(self):
tmpdir = self.make_tmpdir()
with open(os.path.join(tmpdir, 'file1'), 'w') as f:
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list