[ARVADOS] created: 7b93dc26eb285261a6a431daa511edfb7392a3a1

Git user git at public.curoverse.com
Wed Aug 9 19:16:40 EDT 2017


        at  7b93dc26eb285261a6a431daa511edfb7392a3a1 (commit)


commit 7b93dc26eb285261a6a431daa511edfb7392a3a1
Author: Lucas Di Pentima <lucas at curoverse.com>
Date:   Wed Aug 9 20:14:07 2017 -0300

    8937: Added cache validation method to arv-put. For now it only
    checks that the first signed block's locator is valid.
    Added integration test.
    
    Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas at curoverse.com>

diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index afd9bdc..3a66929 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -796,8 +796,16 @@ class ArvPutUploadJob(object):
                 arv_cmd.make_home_conf_dir(self.CACHE_DIR, 0o700, 'raise'),
                 cache_filename)
             if self.resume and os.path.exists(cache_filepath):
-                self.logger.info("Resuming upload from cache file {}".format(cache_filepath))
-                self._cache_file = open(cache_filepath, 'a+')
+                if self._cache_is_valid(cache_filepath):
+                    self.logger.info(
+                        "Resuming upload from cache file {}".format(
+                            cache_filepath))
+                    self._cache_file = open(cache_filepath, 'a+')
+                else:
+                    self.logger.info(
+                        "Cache file {} is not valid, starting from scratch".format(
+                            cache_filepath))
+                    self._cache_file = open(cache_filepath, 'w+')
             else:
                 # --no-resume means start with a empty cache file.
                 self.logger.info("Creating new cache file at {}".format(cache_filepath))
@@ -823,6 +831,23 @@ class ArvPutUploadJob(object):
             # Load the previous manifest so we can check if files were modified remotely.
             self._local_collection = arvados.collection.Collection(self._state['manifest'], replication_desired=self.replication_desired, put_threads=self.put_threads)
 
+    def _cache_is_valid(self, filepath):
+        try:
+            with open(filepath, 'r') as cache_file:
+                manifest = json.load(cache_file)['manifest']
+            kc = arvados.keep.KeepClient(api_client=api_client)
+            # Check that the first block's token (oldest) is valid
+            for line in manifest.split('\n'):
+                match = arvados.util.signed_locator_pattern.search(line)
+                if match is not None:
+                    loc = match.group(0)
+                    return kc.head(loc, num_retries=self.num_retries)
+            # No signed locator found, all ok.
+            return True
+        except Exception as e:
+            self.logger.info("Something wrong happened when checking cache file: {}".format(e))
+            return False
+
     def collection_file_paths(self, col, path_prefix='.'):
         """Return a list of file paths by recursively go through the entire collection `col`"""
         file_paths = []
diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py
index ce66181..9310318 100644
--- a/sdk/python/tests/test_arv_put.py
+++ b/sdk/python/tests/test_arv_put.py
@@ -9,21 +9,22 @@ standard_library.install_aliases()
 from builtins import str
 from builtins import range
 import apiclient
+import hashlib
+import json
 import mock
 import os
 import pwd
+import random
 import re
 import shutil
 import subprocess
 import sys
 import tempfile
+import threading
 import time
 import unittest
-import yaml
-import threading
-import hashlib
-import random
 import uuid
+import yaml
 
 import arvados
 import arvados.commands.put as arv_put
@@ -840,6 +841,43 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
         self.assertEqual(1, len(collection_list))
         return collection_list[0]
 
+    def test_invalid_token_invalidates_cache(self):
+        self.authorize_with('active')
+        tmpdir = self.make_tmpdir()
+        with open(os.path.join(tmpdir, 'somefile.txt'), 'w') as f:
+            f.write('foo')
+        # Upload a directory and get the cache file name
+        p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir],
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE,
+                             env=self.ENVIRON)
+        (out, err) = p.communicate()
+        self.assertRegex(err.decode(), r'INFO: Creating new cache file at ')
+        self.assertEqual(p.returncode, 0)
+        cache_filepath = re.search(r'INFO: Creating new cache file at (.*)',
+                                   err.decode()).groups()[0]
+        self.assertTrue(os.path.isfile(cache_filepath))
+        # Load the cache file contents and modify the manifest to simulate
+        # an invalid access token
+        with open(cache_filepath, 'r') as c:
+            cache = json.load(c)
+        self.assertRegex(cache['manifest'], r'\+A\S+\@')
+        cache['manifest'] = re.sub(r'\+A\S+\@',
+                                   '+Athistokendoesnotwork@',
+                                   cache['manifest'])
+        with open(cache_filepath, 'w') as c:
+            c.write(json.dumps(cache))
+        # Re-run the upload and expect to get an invalid cache message
+        p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir],
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE,
+                             env=self.ENVIRON)
+        (out, err) = p.communicate()
+        self.assertRegex(
+            err.decode(),
+            r'INFO: Cache file (.*) is not valid, starting from scratch')
+        self.assertEqual(p.returncode, 0)
+
     def test_put_collection_with_later_update(self):
         tmpdir = self.make_tmpdir()
         with open(os.path.join(tmpdir, 'file1'), 'w') as f:

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list