[ARVADOS] created: 01d5f03da039d8ad11a3323906e2416be120cb89

git at public.curoverse.com git at public.curoverse.com
Tue May 6 14:21:24 EDT 2014


        at  01d5f03da039d8ad11a3323906e2416be120cb89 (commit)


commit 01d5f03da039d8ad11a3323906e2416be120cb89
Author: Brett Smith <brett at curoverse.com>
Date:   Tue May 6 14:21:34 2014 -0400

    workbench: File downloads include reader tokens.
    
    This makes it possible to download an entire collection by pointing
    `wget -r` at a collection page with a reader_tokens parameter.
    
    wget respects robots.txt, so this required a finer-grained approach to
    shooing away robots.  We use a meta tag to mark everything as NOINDEX,
    and implement NOFOLLOW on everything except Collections pages.

diff --git a/apps/workbench/app/views/collections/_show_files.html.erb b/apps/workbench/app/views/collections/_show_files.html.erb
index 4b63162..6935789 100644
--- a/apps/workbench/app/views/collections/_show_files.html.erb
+++ b/apps/workbench/app/views/collections/_show_files.html.erb
@@ -34,6 +34,8 @@
       <th>d/l</th>
     </tr>
   </thead><tbody>
+    <% download_params = {controller: 'collections', action: 'show_file', disposition: 'attachment'} -%>
+    <% download_params[:reader_tokens] = Thread.current[:reader_tokens] if Thread.current[:reader_tokens].andand.any? -%>
     <% if @object then @object.files.sort_by{|f|[f[0],f[1]]}.each do |file| %>
       <% file_path = CollectionsHelper::file_path file %>
       <tr>
@@ -66,7 +68,7 @@
 
         <td>
           <div style="display:inline-block">
-            <%= link_to raw('<i class="glyphicon glyphicon-download-alt"></i>'), {controller: 'collections', action: 'show_file', uuid: @object.uuid, file: file_path, size: file[2], disposition: 'attachment'}, {class: 'btn btn-info btn-sm', title: 'Download'} %>
+            <%= link_to raw('<i class="glyphicon glyphicon-download-alt"></i>'), download_params.merge({uuid: @object.uuid, file: file_path, size: file[2]}), {class: 'btn btn-info btn-sm', title: 'Download'} %>
           </div>
         </td>
       </tr>
diff --git a/apps/workbench/app/views/layouts/application.html.erb b/apps/workbench/app/views/layouts/application.html.erb
index 2b5ec88..ce35600 100644
--- a/apps/workbench/app/views/layouts/application.html.erb
+++ b/apps/workbench/app/views/layouts/application.html.erb
@@ -14,6 +14,7 @@
   <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
   <meta name="description" content="">
   <meta name="author" content="">
+  <meta name="robots" content="<%= (controller.class == CollectionsController) ? 'NOINDEX' : 'NOINDEX, NOFOLLOW' %>">
   <%= stylesheet_link_tag    "application", :media => "all" %>
   <%= javascript_include_tag "application" %>
   <%= csrf_meta_tags %>
diff --git a/apps/workbench/public/robots.txt b/apps/workbench/public/robots.txt
index c6742d8..e69de29 100644
--- a/apps/workbench/public/robots.txt
+++ b/apps/workbench/public/robots.txt
@@ -1,2 +0,0 @@
-User-Agent: *
-Disallow: /
diff --git a/apps/workbench/test/integration/collections_test.rb b/apps/workbench/test/integration/collections_test.rb
index bd426f7..c0dcc96 100644
--- a/apps/workbench/test/integration/collections_test.rb
+++ b/apps/workbench/test/integration/collections_test.rb
@@ -3,7 +3,6 @@ require 'selenium-webdriver'
 require 'headless'
 
 class CollectionsTest < ActionDispatch::IntegrationTest
-
   def change_persist oldstate, newstate
     find "div[data-persistent-state='#{oldstate}']"
     page.assert_no_selector "div[data-persistent-state='#{newstate}']"
@@ -39,4 +38,25 @@ class CollectionsTest < ActionDispatch::IntegrationTest
     change_persist 'persistent', 'cache'
   end
 
+  test "can download an entire collection with a reader token" do
+    uuid = api_fixture('collections')['foo_file']['uuid']
+    token = api_fixture('api_client_authorizations')['active']['api_token']
+    q_string = URI.encode_www_form('reader_tokens[]' => token)
+    visit "/collections/#{uuid}?#{q_string}"
+    # It seems that Capybara can't inspect tags outside the body, so this is
+    # a very blunt approach.
+    assert_no_match(/\bnofollow\b/i, page.html,
+                    "wget prohibited from recursing the collection page")
+    # TODO: When we can test against a Keep server, actually click the link
+    # and check the contents, rather than testing the href directly
+    # (this is too closely tied to implementation details).
+    link = nil
+    assert_nothing_raised("failed to list foo files with reader token") do
+      link = find_link('Download')
+    end
+    assert_match(%r{^/collections/#{Regexp.escape uuid}/foo}, link[:href],
+                 "download link doesn't point to foo file")
+    assert_match(/\b#{Regexp.escape q_string}\b/, link[:href],
+                 "collection file download link did not inherit reader tokens")
+  end
 end

commit 5c8db6d44031df0a97b66bec28dd4acc7431a969
Author: Brett Smith <brett at curoverse.com>
Date:   Tue May 6 11:30:58 2014 -0400

    workbench: Make Keep file EPERM test stricter.
    
    Refs #1904.  Tom said that we want the security benefits of a 404
    result, so make sure that's exactly what we get.

diff --git a/apps/workbench/test/functional/collections_controller_test.rb b/apps/workbench/test/functional/collections_controller_test.rb
index d1a8de2..24a2caf 100644
--- a/apps/workbench/test/functional/collections_controller_test.rb
+++ b/apps/workbench/test/functional/collections_controller_test.rb
@@ -88,7 +88,7 @@ class CollectionsControllerTest < ActionController::TestCase
     params = collection_params(:foo_file, 'foo')
     sess = session_for(:spectator)
     get(:show_file, params, sess)
-    assert_includes([403, 404], @response.code.to_i)
+    assert_response 404
   end
 
   test "trying to get a nonexistent file from Keep returns a 404" do

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list