[ARVADOS] updated: 0a38db3319886a477c7c204396df2ffa1cfd61d2

git at public.curoverse.com git at public.curoverse.com
Wed Oct 14 04:15:23 EDT 2015


Summary of changes:
 apps/workbench/Gemfile.lock                        | 15 ++++---
 .../app/controllers/collections_controller.rb      |  3 +-
 .../controllers/collections_controller_test.rb     | 33 ++++++++++++++--
 apps/workbench/test/helpers/download_helper.rb     | 21 ++++++++++
 apps/workbench/test/integration/download_test.rb   | 38 ++++++++++++++++++
 apps/workbench/test/integration_helper.rb          | 15 ++++++-
 apps/workbench/test/test_helper.rb                 |  5 +++
 sdk/python/tests/nginx.conf                        | 14 +++++++
 sdk/python/tests/run_test_server.py                | 34 +++++++++++++++-
 .../api/app/controllers/database_controller.rb     |  4 ++
 .../test/fixtures/api_client_authorizations.yml    |  2 +-
 services/keepstore/azure_blob_volume.go            | 30 ++++++++------
 services/keepstore/volume_generic_test.go          | 46 ++++++++++++++++++++++
 services/keepstore/volume_unix.go                  | 17 +++++++-
 14 files changed, 246 insertions(+), 31 deletions(-)
 create mode 100644 apps/workbench/test/helpers/download_helper.rb
 create mode 100644 apps/workbench/test/integration/download_test.rb

  discards  01947441b3d8c90f2d76f1b274e750feb2488951 (commit)
  discards  49adb31e39baf976e7e4919013c5f568aa2c4007 (commit)
  discards  77a27b75347d3dd306eab96a13ed7e232cd5a27d (commit)
  discards  9e7ecf80ed7a1af6826c91f763ffce02b8cb7d98 (commit)
  discards  954e58a68015537c57a1fde78a63f09656a66e35 (commit)
  discards  f895709ee26fab21a1cff34a3f6962e3814597da (commit)
  discards  7029a542ad5ee76eaab0919252789523552bc9cd (commit)
  discards  ffcf6c4cc00a81d0f77c41b3f3b8787d99c9c96e (commit)
  discards  605ae1d5d3ba03ec55115899231c691910da8a28 (commit)
  discards  325f765d8a1d8af1fecab160d436a69d6b499ec9 (commit)
  discards  6e5a24f96c33ecbf5fd1db2c630112485e6ac78a (commit)
  discards  689a24939aaa834d031707b28fd8366aea4220dc (commit)
  discards  df167ba57a1a04ea8af03a492cf36e775fba1090 (commit)
  discards  9e884457b3a5219f1d9f994e99952deed508288f (commit)
  discards  ec2f931644a9495eda253831099641073b2f0c95 (commit)
  discards  a9ef8e3cb0a9d4bfca185af19ed437ff92bec872 (commit)
  discards  9f393c4e51e2aaa871e4918d1517e915b841d59f (commit)
  discards  887c033f920c7142c3959645e9fb5688258625c7 (commit)
  discards  2131e7eb86a1303d03e0a8220170533ac66d261d (commit)
  discards  6d2eaaeac541de343225bcf608c298571d2b5c87 (commit)
  discards  152d7e63f38a6c3c134a711796bf4cee88124dca (commit)
  discards  3a93e0220ea1e502800f9ad624b86afe88d7dbfb (commit)
  discards  3bfb922e9b97901d31c5b6b95c8427bd4ed62e72 (commit)
  discards  f9759447fa1b763add95631316ee8e412c2be890 (commit)
  discards  dfaf207e5aeb8633cd70d400ed82ad51a07fdde9 (commit)
       via  0a38db3319886a477c7c204396df2ffa1cfd61d2 (commit)
       via  448da21288cc106e9095279925dbf691668e3ce0 (commit)
       via  b12996a9d9ad7bf527b40639b078b3cea6640a9d (commit)
       via  600f427244a0242d26e9fe027916e4146034dbf4 (commit)
       via  44a382de4e7b8986c17e5baeae94f8e521d923b6 (commit)
       via  1f04571a18a218c0968c92609a481913ec134171 (commit)
       via  9fa083d19ca4c88c7e2802852b6a89da0575c73e (commit)
       via  35d3f6566426986cc6da4a5ec4c4844cbbd17ce2 (commit)
       via  391fbe89b56b718f674822534c34ff80aa107576 (commit)
       via  796a00b5887121e462f8a82391bac125120c2841 (commit)
       via  a5741dcc18ae0862b220ed08d4db136cfa979ec5 (commit)
       via  eeda48bc31d5cd03d3a72becaaac4d643c9de46d (commit)
       via  5ba24fbe0b0529bbbb5bda2790b61c35ef256469 (commit)
       via  7796b112345b302ff6108ff761ff0d3c871bf888 (commit)
       via  8bb5dcb75f10e8128e2b4b5b741a96e781174997 (commit)
       via  e0aa44d1f555056ba1dc8e866cf6ae50494cb3e2 (commit)
       via  f04287b86a8d4e8b74cf8d894ef7ce5420364f7a (commit)
       via  91062e85e93d0d0ae557a478f1f83b133d4d2080 (commit)
       via  b9d5acacf4ac027867e388040221101b73dbf118 (commit)
       via  8146d4cc8c3d52f548f6af66de26f30881ebec39 (commit)
       via  2a54340956104d689ab52f60d82af5555a103919 (commit)
       via  967632c79df826ab16a0449ff63b0b9f6d35c599 (commit)
       via  d1289b1021929fbbba35027f70a33e05cb594bb3 (commit)
       via  d37ebe600d3984c821386c347f33c59ef3355e03 (commit)
       via  ff7759fbe3aab8f814a41437cd6e97b80fe56d8d (commit)
       via  fc7c9e1ad6345888595205c0978c58a6710d7446 (commit)
       via  e3778ef533f50b0492eef80bb2525a7a09628c32 (commit)
       via  e7d36bd8f4ca443f433c63eac863fec7e61ef121 (commit)
       via  40c9b26a39c773e806e0a1430774f1787820376f (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (01947441b3d8c90f2d76f1b274e750feb2488951)
            \
             N -- N -- N (0a38db3319886a477c7c204396df2ffa1cfd61d2)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 0a38db3319886a477c7c204396df2ffa1cfd61d2
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Oct 14 04:07:37 2015 -0400

    5824: Update bundle

diff --git a/apps/workbench/Gemfile.lock b/apps/workbench/Gemfile.lock
index 20b8d61..8b2118c 100644
--- a/apps/workbench/Gemfile.lock
+++ b/apps/workbench/Gemfile.lock
@@ -74,7 +74,7 @@ GEM
       rack (>= 1.0.0)
       rack-test (>= 0.5.4)
       xpath (~> 2.0)
-    childprocess (0.5.5)
+    childprocess (0.5.6)
       ffi (~> 1.0, >= 1.0.11)
     cliver (0.3.2)
     coffee-rails (4.1.0)
@@ -98,7 +98,7 @@ GEM
     fast_stack (0.1.0)
       rake
       rake-compiler
-    ffi (1.9.6)
+    ffi (1.9.10)
     flamegraph (0.1.0)
       fast_stack
     google-api-client (0.6.4)
@@ -139,7 +139,7 @@ GEM
       metaclass (~> 0.0.1)
     morrisjs-rails (0.5.1)
       railties (> 3.1, < 5)
-    multi_json (1.11.1)
+    multi_json (1.11.2)
     multipart-post (1.2.0)
     net-scp (1.2.1)
       net-ssh (>= 2.6.5)
@@ -192,7 +192,7 @@ GEM
     ref (1.0.5)
     ruby-debug-passenger (0.2.0)
     ruby-prof (0.15.2)
-    rubyzip (1.1.6)
+    rubyzip (1.1.7)
     rvm-capistrano (1.5.5)
       capistrano (~> 2.15.4)
     sass (3.4.9)
@@ -202,7 +202,7 @@ GEM
       sprockets (>= 2.8, < 4.0)
       sprockets-rails (>= 2.0, < 4.0)
       tilt (~> 1.1)
-    selenium-webdriver (2.44.0)
+    selenium-webdriver (2.48.1)
       childprocess (~> 0.5)
       multi_json (~> 1.0)
       rubyzip (~> 1.0)
@@ -239,7 +239,7 @@ GEM
       execjs (>= 0.3.0)
       json (>= 1.8.0)
     uuidtools (2.1.5)
-    websocket (1.2.1)
+    websocket (1.2.2)
     websocket-driver (0.5.1)
       websocket-extensions (>= 0.1.0)
     websocket-extensions (0.1.1)
@@ -294,3 +294,6 @@ DEPENDENCIES
   therubyracer
   uglifier (>= 1.0.3)
   wiselinks
+
+BUNDLED WITH
+   1.10.6

commit 448da21288cc106e9095279925dbf691668e3ce0
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Oct 13 10:52:06 2015 -0400

    5824: Use keep-web in Workbench integration tests

diff --git a/apps/workbench/test/helpers/download_helper.rb b/apps/workbench/test/helpers/download_helper.rb
new file mode 100644
index 0000000..21fb4cd
--- /dev/null
+++ b/apps/workbench/test/helpers/download_helper.rb
@@ -0,0 +1,21 @@
+module DownloadHelper
+  module_function
+
+  def path
+    Rails.root.join 'tmp', 'downloads'
+  end
+
+  def clear
+    FileUtils.rm_f path
+    begin
+      Dir.mkdir path
+    rescue Errno::EEXIST
+    end
+  end
+
+  def done
+    Dir[path.join '*'].reject do |f|
+      /\.part$/ =~ f
+    end
+  end
+end
diff --git a/apps/workbench/test/integration/download_test.rb b/apps/workbench/test/integration/download_test.rb
new file mode 100644
index 0000000..3f8eaf2
--- /dev/null
+++ b/apps/workbench/test/integration/download_test.rb
@@ -0,0 +1,38 @@
+require 'integration_helper'
+require 'helpers/download_helper'
+
+class DownloadTest < ActionDispatch::IntegrationTest
+  setup do
+    portfile = File.expand_path '../../../../../tmp/keep-web-ssl.port', __FILE__
+    @kwport = File.read portfile
+    Rails.configuration.keep_web_url = "https://localhost:#{@kwport}/c=%{uuid_or_pdh}"
+    CollectionsController.any_instance.expects(:file_enumerator).never
+
+    # Make sure Capybara can download files.
+    need_selenium 'for downloading', :selenium_with_download
+    DownloadHelper.clear
+
+    # Keep data isn't populated by fixtures, so we have to write any
+    # data we expect to read.
+    unless /^acbd/ =~ `echo -n foo | arv-put --no-progress --raw -` && $?.success?
+      raise $?.to_s
+    end
+  end
+
+  test "download from keep-web with a reader token" do
+    uuid = api_fixture('collections')['foo_file']['uuid']
+    token = api_fixture('api_client_authorizations')['active_all_collections']['api_token']
+    visit "/collections/download/#{uuid}/#{token}/"
+    within "#collection_files" do
+      click_link "foo"
+    end
+    data = nil
+    tries = 0
+    while tries < 20
+      sleep 0.1
+      tries += 1
+      data = File.read(DownloadHelper.path.join 'foo') rescue nil
+    end
+    assert_equal 'foo', data
+  end
+end
diff --git a/apps/workbench/test/integration_helper.rb b/apps/workbench/test/integration_helper.rb
index 39fdf4b..5750a1b 100644
--- a/apps/workbench/test/integration_helper.rb
+++ b/apps/workbench/test/integration_helper.rb
@@ -19,6 +19,17 @@ Capybara.register_driver :poltergeist_without_file_api do |app|
   Capybara::Poltergeist::Driver.new app, POLTERGEIST_OPTS.merge(extensions: [js])
 end
 
+Capybara.register_driver :selenium_with_download do |app|
+  profile = Selenium::WebDriver::Firefox::Profile.new
+  profile['browser.download.dir'] = DownloadHelper.path.to_s
+  profile['browser.download.downloadDir'] = DownloadHelper.path.to_s
+  profile['browser.download.defaultFolder'] = DownloadHelper.path.to_s
+  profile['browser.download.folderList'] = 2 # "save to user-defined location"
+  profile['browser.download.manager.showWhenStarting'] = false
+  profile['browser.helperApps.alwaysAsk.force'] = false
+  Capybara::Selenium::Driver.new app, profile: profile
+end
+
 module WaitForAjax
   Capybara.default_wait_time = 5
   def wait_for_ajax
@@ -73,8 +84,8 @@ module HeadlessHelper
     end
   end
 
-  def need_selenium reason=nil
-    Capybara.current_driver = :selenium
+  def need_selenium reason=nil, driver=:selenium
+    Capybara.current_driver = driver
     unless ENV['ARVADOS_TEST_HEADFUL'] or @headless
       @headless = HeadlessSingleton.get
       @headless.start
diff --git a/apps/workbench/test/test_helper.rb b/apps/workbench/test/test_helper.rb
index 89d15c6..41592af 100644
--- a/apps/workbench/test/test_helper.rb
+++ b/apps/workbench/test/test_helper.rb
@@ -176,7 +176,10 @@ class ApiServerForTests
       # though it doesn't need to start up a new server).
       env_script = check_output %w(python ./run_test_server.py start --auth admin)
       check_output %w(python ./run_test_server.py start_arv-git-httpd)
+      check_output %w(python ./run_test_server.py start_keep-web)
       check_output %w(python ./run_test_server.py start_nginx)
+      # This one isn't a no-op, even under run-tests.sh.
+      check_output %w(python ./run_test_server.py start_keep)
     end
     test_env = {}
     env_script.each_line do |line|
@@ -192,9 +195,11 @@ class ApiServerForTests
 
   def stop_test_server
     Dir.chdir PYTHON_TESTS_DIR do
+      check_output %w(python ./run_test_server.py stop_keep)
       # These are no-ops if we're running within run-tests.sh
       check_output %w(python ./run_test_server.py stop_nginx)
       check_output %w(python ./run_test_server.py stop_arv-git-httpd)
+      check_output %w(python ./run_test_server.py stop_keep-web)
       check_output %w(python ./run_test_server.py stop)
     end
     @@server_is_running = false
diff --git a/sdk/python/tests/nginx.conf b/sdk/python/tests/nginx.conf
index 6196605..885f84e 100644
--- a/sdk/python/tests/nginx.conf
+++ b/sdk/python/tests/nginx.conf
@@ -28,4 +28,18 @@ http {
       proxy_pass http://keepproxy;
     }
   }
+  upstream keep-web {
+    server localhost:{{KEEPWEBPORT}};
+  }
+  server {
+    listen *:{{KEEPWEBSSLPORT}} ssl default_server;
+    server_name ~^(?<request_host>.*)$;
+    ssl_certificate {{SSLCERT}};
+    ssl_certificate_key {{SSLKEY}};
+    location  / {
+      proxy_pass http://keep-web;
+      proxy_set_header Host $request_host:{{KEEPWEBPORT}};
+      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    }
+  }
 }
diff --git a/sdk/python/tests/run_test_server.py b/sdk/python/tests/run_test_server.py
index 5d0c42a..b8de60f 100644
--- a/sdk/python/tests/run_test_server.py
+++ b/sdk/python/tests/run_test_server.py
@@ -437,10 +437,35 @@ def stop_arv_git_httpd():
         return
     kill_server_pid(_pidfile('arv-git-httpd'), wait=0)
 
+def run_keep_web():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    stop_keep_web()
+
+    keepwebport = find_available_port()
+    env = os.environ.copy()
+    env.pop('ARVADOS_API_TOKEN', None)
+    keepweb = subprocess.Popen(
+        ['keep-web',
+         '-attachment-only-host=localhost:'+str(keepwebport),
+         '-address=:'+str(keepwebport)],
+        env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+    with open(_pidfile('keep-web'), 'w') as f:
+        f.write(str(keepweb.pid))
+    _setport('keep-web', keepwebport)
+    _wait_until_port_listens(keepwebport)
+
+def stop_keep_web():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    kill_server_pid(_pidfile('keep-web'), wait=0)
+
 def run_nginx():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
         return
     nginxconf = {}
+    nginxconf['KEEPWEBPORT'] = _getport('keep-web')
+    nginxconf['KEEPWEBSSLPORT'] = find_available_port()
     nginxconf['KEEPPROXYPORT'] = _getport('keepproxy')
     nginxconf['KEEPPROXYSSLPORT'] = find_available_port()
     nginxconf['GITPORT'] = _getport('arv-git-httpd')
@@ -464,6 +489,7 @@ def run_nginx():
          '-g', 'pid '+_pidfile('nginx')+';',
          '-c', conffile],
         env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+    _setport('keep-web-ssl', nginxconf['KEEPWEBSSLPORT'])
     _setport('keepproxy-ssl', nginxconf['KEEPPROXYSSLPORT'])
     _setport('arv-git-httpd-ssl', nginxconf['GITSSLPORT'])
 
@@ -563,7 +589,8 @@ class TestCaseWithServers(unittest.TestCase):
         for server_kwargs, start_func, stop_func in (
                 (cls.MAIN_SERVER, run, reset),
                 (cls.KEEP_SERVER, run_keep, stop_keep),
-                (cls.KEEP_PROXY_SERVER, run_keep_proxy, stop_keep_proxy)):
+                (cls.KEEP_PROXY_SERVER, run_keep_proxy, stop_keep_proxy),
+                (cls.KEEP_WEB_SERVER, run_keep_web, stop_keep_web)):
             if server_kwargs is not None:
                 start_func(**server_kwargs)
                 cls._cleanup_funcs.append(stop_func)
@@ -589,6 +616,7 @@ if __name__ == "__main__":
         'start', 'stop',
         'start_keep', 'stop_keep',
         'start_keep_proxy', 'stop_keep_proxy',
+        'start_keep-web', 'stop_keep-web',
         'start_arv-git-httpd', 'stop_arv-git-httpd',
         'start_nginx', 'stop_nginx',
     ]
@@ -625,6 +653,10 @@ if __name__ == "__main__":
         run_arv_git_httpd()
     elif args.action == 'stop_arv-git-httpd':
         stop_arv_git_httpd()
+    elif args.action == 'start_keep-web':
+        run_keep_web()
+    elif args.action == 'stop_keep-web':
+        stop_keep_web()
     elif args.action == 'start_nginx':
         run_nginx()
     elif args.action == 'stop_nginx':
diff --git a/services/api/app/controllers/database_controller.rb b/services/api/app/controllers/database_controller.rb
index 64818da..21c8e47 100644
--- a/services/api/app/controllers/database_controller.rb
+++ b/services/api/app/controllers/database_controller.rb
@@ -29,6 +29,10 @@ class DatabaseController < ApplicationController
     fixturesets = Dir.glob(Rails.root.join('test', 'fixtures', '*.yml')).
       collect { |yml| yml.match(/([^\/]*)\.yml$/)[1] }
 
+    # Don't reset keep_services: clients need to discover our
+    # integration-testing keepstores, not test fixtures.
+    fixturesets -= %w[keep_services]
+
     table_names = '"' + ActiveRecord::Base.connection.tables.join('","') + '"'
 
     attempts_left = 20
diff --git a/services/api/test/fixtures/api_client_authorizations.yml b/services/api/test/fixtures/api_client_authorizations.yml
index 9199d17..ecb9adb 100644
--- a/services/api/test/fixtures/api_client_authorizations.yml
+++ b/services/api/test/fixtures/api_client_authorizations.yml
@@ -87,7 +87,7 @@ active_all_collections:
   user: active
   api_token: activecollectionsabcdefghijklmnopqrstuvwxyz1234567
   expires_at: 2038-01-01 00:00:00
-  scopes: ["GET /arvados/v1/collections/", "GET /arvados/v1/keep_disks"]
+  scopes: ["GET /arvados/v1/collections/", "GET /arvados/v1/keep_services", "GET /arvados/v1/keep_services/"]
 
 active_userlist:
   api_client: untrusted

commit b12996a9d9ad7bf527b40639b078b3cea6640a9d
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Oct 12 19:15:06 2015 -0400

    5824: Add option to redirect Workbench downloads to a keep-web service

diff --git a/apps/workbench/app/controllers/collections_controller.rb b/apps/workbench/app/controllers/collections_controller.rb
index e01151c..38b58a1 100644
--- a/apps/workbench/app/controllers/collections_controller.rb
+++ b/apps/workbench/app/controllers/collections_controller.rb
@@ -1,4 +1,6 @@
 require "arvados/keep"
+require "uri"
+require "cgi"
 
 class CollectionsController < ApplicationController
   include ActionController::Live
@@ -130,11 +132,27 @@ class CollectionsController < ApplicationController
     usable_token = find_usable_token(tokens) do
       coll = Collection.find(params[:uuid])
     end
+    if usable_token.nil?
+      # Response already rendered.
+      return
+    end
+
+    if Rails.configuration.keep_web_url
+      opts = {}
+      if usable_token == params[:reader_token]
+        opts[:path_token] = usable_token
+      elsif usable_token == Rails.configuration.anonymous_user_token
+        # Don't pass a token at all
+      else
+        # We pass the current user's real token only if it's necessary
+        # to read the collection.
+        opts[:query_token] = usable_token
+      end
+      return redirect_to keep_web_url(params[:uuid], params[:file], opts)
+    end
 
     file_name = params[:file].andand.sub(/^(\.\/|\/|)/, './')
-    if usable_token.nil?
-      return  # Response already rendered.
-    elsif file_name.nil? or not coll.manifest.has_file?(file_name)
+    if file_name.nil? or not coll.manifest.has_file?(file_name)
       return render_not_found
     end
 
@@ -305,6 +323,21 @@ class CollectionsController < ApplicationController
     return nil
   end
 
+  def keep_web_url(uuid_or_pdh, file, opts)
+    fmt = {uuid_or_pdh: uuid_or_pdh.sub('+', '-')}
+    uri = URI.parse(Rails.configuration.keep_web_url % fmt)
+    uri.path += '/' unless uri.path.end_with? '/'
+    if opts[:path_token]
+      uri.path += 't=' + opts[:path_token] + '/'
+    end
+    uri.path += '_/'
+    uri.path += CGI::escape(file)
+    if opts[:query_token]
+      uri.query = 'api_token=' + CGI::escape(opts[:query_token])
+    end
+    uri.to_s
+  end
+
   # Note: several controller and integration tests rely on stubbing
   # file_enumerator to return fake file content.
   def file_enumerator opts
diff --git a/apps/workbench/config/application.default.yml b/apps/workbench/config/application.default.yml
index 00959bb..5504fd2 100644
--- a/apps/workbench/config/application.default.yml
+++ b/apps/workbench/config/application.default.yml
@@ -225,3 +225,11 @@ common:
   # E.g., using a name-based proxy server to forward connections to shell hosts:
   # https://%{hostname}.webshell.uuid_prefix.arvadosapi.com/
   shell_in_a_box_url: false
+
+  # Format of download/preview links. If false, use Workbench's
+  # download facility.
+  #
+  # Examples:
+  # keep_web_url: https://%{uuid_or_pdh}.dl.zzzzz.your.domain
+  # keep_web_url: https://%{uuid_or_pdh}--dl.zzzzz.your.domain
+  keep_web_url: false
diff --git a/apps/workbench/test/controllers/collections_controller_test.rb b/apps/workbench/test/controllers/collections_controller_test.rb
index 13644e0..b4e7dd3 100644
--- a/apps/workbench/test/controllers/collections_controller_test.rb
+++ b/apps/workbench/test/controllers/collections_controller_test.rb
@@ -514,4 +514,55 @@ class CollectionsControllerTest < ActionController::TestCase
     get :show, {id: api_fixture('collections')['user_agreement']['uuid']}, session_for(:active)
     assert_not_includes @response.body, '<a href="#Upload"'
   end
+
+  def setup_for_keep_web cfg='https://%{uuid_or_pdh}.dl.zzzzz.example'
+    Rails.configuration.keep_web_url = cfg
+    @controller.expects(:file_enumerator).never
+  end
+
+  %w(uuid portable_data_hash).each do |id_type|
+    test "Redirect to keep_web_url via #{id_type}" do
+      setup_for_keep_web
+      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      id = api_fixture('collections')['w_a_z_file'][id_type]
+      get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
+      assert_response :redirect
+      assert_equal "https://#{id.sub '+', '-'}.dl.zzzzz.example/_/w+a+z?api_token=#{tok}", @response.redirect_url
+    end
+
+    test "Redirect to keep_web_url via #{id_type} with reader token" do
+      setup_for_keep_web
+      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      id = api_fixture('collections')['w_a_z_file'][id_type]
+      get :show_file, {uuid: id, file: "w a z", reader_token: tok}, session_for(:expired)
+      assert_response :redirect
+      assert_equal "https://#{id.sub '+', '-'}.dl.zzzzz.example/t=#{tok}/_/w+a+z", @response.redirect_url
+    end
+
+    test "Redirect to keep_web_url via #{id_type} with no token" do
+      setup_for_keep_web
+      Rails.configuration.anonymous_user_token =
+        api_fixture('api_client_authorizations')['anonymous']['api_token']
+      id = api_fixture('collections')['public_text_file'][id_type]
+      get :show_file, {uuid: id, file: "Hello World.txt"}
+      assert_response :redirect
+      assert_equal "https://#{id.sub '+', '-'}.dl.zzzzz.example/_/Hello+World.txt", @response.redirect_url
+    end
+
+    test "Redirect to keep_web_url via #{id_type} using -attachment-only-host mode" do
+      setup_for_keep_web 'https://dl.zzzzz.example/c=%{uuid_or_pdh}'
+      tok = api_fixture('api_client_authorizations')['active']['api_token']
+      id = api_fixture('collections')['w_a_z_file'][id_type]
+      get :show_file, {uuid: id, file: "w a z"}, session_for(:active)
+      assert_response :redirect
+      assert_equal "https://dl.zzzzz.example/c=#{id.sub '+', '-'}/_/w+a+z?api_token=#{tok}", @response.redirect_url
+    end
+  end
+
+  test "No redirect to keep_web_url if collection not found" do
+    setup_for_keep_web
+    id = api_fixture('collections')['w_a_z_file']['uuid']
+    get :show_file, {uuid: id, file: "w a z"}, session_for(:spectator)
+    assert_response 404
+  end
 end
diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index 8ae9490..cc47ebe 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -94,8 +94,8 @@
 //   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/foo
 //   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/_/foo
 //   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/_/foo
-//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--foo.example.com/foo
-//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--.invalid/foo
+//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--foo.example.com/foo
+//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3-45--.invalid/foo
 //
 // An additional form is supported specifically to make it more
 // convenient to maintain support for existing Workbench download

commit 600f427244a0242d26e9fe027916e4146034dbf4
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Oct 1 22:16:51 2015 -0400

    5824: gofmt

diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index b39a941..9751cd1 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -21,9 +21,9 @@ import (
 type handler struct{}
 
 var (
-	clientPool      = arvadosclient.MakeClientPool()
-	trustAllContent = false
-	anonymousTokens []string
+	clientPool         = arvadosclient.MakeClientPool()
+	trustAllContent    = false
+	anonymousTokens    []string
 	attachmentOnlyHost = ""
 )
 
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index a64aeb5..9b5ab2a 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -16,7 +16,7 @@ import (
 
 var _ = check.Suite(&UnitSuite{})
 
-type UnitSuite struct {}
+type UnitSuite struct{}
 
 func mustParseURL(s string) *url.URL {
 	r, err := url.Parse(s)
@@ -34,7 +34,7 @@ func (s *IntegrationSuite) TestVhost404(c *check.C) {
 		resp := httptest.NewRecorder()
 		req := &http.Request{
 			Method: "GET",
-			URL: mustParseURL(testURL),
+			URL:    mustParseURL(testURL),
 		}
 		(&handler{}).ServeHTTP(resp, req)
 		c.Check(resp.Code, check.Equals, http.StatusNotFound)
@@ -52,7 +52,7 @@ func (s *IntegrationSuite) TestVhostViaAuthzHeader(c *check.C) {
 	doVhostRequests(c, authzViaAuthzHeader)
 }
 func authzViaAuthzHeader(r *http.Request, tok string) int {
-	r.Header.Add("Authorization", "OAuth2 " + tok)
+	r.Header.Add("Authorization", "OAuth2 "+tok)
 	return http.StatusUnauthorized
 }
 
@@ -61,7 +61,7 @@ func (s *IntegrationSuite) TestVhostViaCookieValue(c *check.C) {
 }
 func authzViaCookieValue(r *http.Request, tok string) int {
 	r.AddCookie(&http.Cookie{
-		Name: "api_token",
+		Name:  "api_token",
 		Value: auth.EncodeTokenCookie([]byte(tok)),
 	})
 	return http.StatusUnauthorized
@@ -120,8 +120,8 @@ func doVhostRequestsWithHostPath(c *check.C, authz authorizer, hostPath string)
 		u := mustParseURL("http://" + hostPath)
 		req := &http.Request{
 			Method: "GET",
-			Host: u.Host,
-			URL: u,
+			Host:   u.Host,
+			URL:    u,
 			Header: http.Header{},
 		}
 		failCode := authz(req, tok)
@@ -157,8 +157,8 @@ func doReq(req *http.Request) *httptest.ResponseRecorder {
 	u, _ := req.URL.Parse(resp.Header().Get("Location"))
 	req = &http.Request{
 		Method: "GET",
-		Host: u.Host,
-		URL: u,
+		Host:   u.Host,
+		URL:    u,
 		Header: http.Header{},
 	}
 	for _, c := range cookies {
@@ -169,8 +169,8 @@ func doReq(req *http.Request) *httptest.ResponseRecorder {
 
 func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
 	s.testVhostRedirectTokenToCookie(c, "GET",
-		arvadostest.FooCollection + ".example.com/foo",
-		"?api_token=" + arvadostest.ActiveToken,
+		arvadostest.FooCollection+".example.com/foo",
+		"?api_token="+arvadostest.ActiveToken,
 		"text/plain",
 		"",
 		http.StatusOK,
@@ -179,8 +179,8 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
 
 func (s *IntegrationSuite) TestVhostRedirectQueryTokenSingleOriginError(c *check.C) {
 	s.testVhostRedirectTokenToCookie(c, "GET",
-		"example.com/c=" + arvadostest.FooCollection + "/foo",
-		"?api_token=" + arvadostest.ActiveToken,
+		"example.com/c="+arvadostest.FooCollection+"/foo",
+		"?api_token="+arvadostest.ActiveToken,
 		"text/plain",
 		"",
 		http.StatusBadRequest,
@@ -193,8 +193,8 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C
 	}(trustAllContent)
 	trustAllContent = true
 	s.testVhostRedirectTokenToCookie(c, "GET",
-		"example.com/c=" + arvadostest.FooCollection + "/foo",
-		"?api_token=" + arvadostest.ActiveToken,
+		"example.com/c="+arvadostest.FooCollection+"/foo",
+		"?api_token="+arvadostest.ActiveToken,
 		"text/plain",
 		"",
 		http.StatusOK,
@@ -208,16 +208,16 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenAttachmentOnlyHost(c *chec
 	attachmentOnlyHost = "example.com:1234"
 
 	s.testVhostRedirectTokenToCookie(c, "GET",
-		"example.com/c=" + arvadostest.FooCollection + "/foo",
-		"?api_token=" + arvadostest.ActiveToken,
+		"example.com/c="+arvadostest.FooCollection+"/foo",
+		"?api_token="+arvadostest.ActiveToken,
 		"text/plain",
 		"",
 		http.StatusBadRequest,
 	)
 
 	resp := s.testVhostRedirectTokenToCookie(c, "GET",
-		"example.com:1234/c=" + arvadostest.FooCollection + "/foo",
-		"?api_token=" + arvadostest.ActiveToken,
+		"example.com:1234/c="+arvadostest.FooCollection+"/foo",
+		"?api_token="+arvadostest.ActiveToken,
 		"text/plain",
 		"",
 		http.StatusOK,
@@ -227,7 +227,7 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenAttachmentOnlyHost(c *chec
 
 func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
 	s.testVhostRedirectTokenToCookie(c, "POST",
-		arvadostest.FooCollection + ".example.com/foo",
+		arvadostest.FooCollection+".example.com/foo",
 		"",
 		"application/x-www-form-urlencoded",
 		url.Values{"api_token": {arvadostest.ActiveToken}}.Encode(),
@@ -237,7 +237,7 @@ func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
 
 func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C) {
 	s.testVhostRedirectTokenToCookie(c, "POST",
-		arvadostest.FooCollection + ".example.com/foo",
+		arvadostest.FooCollection+".example.com/foo",
 		"",
 		"application/x-www-form-urlencoded",
 		url.Values{"api_token": {arvadostest.SpectatorToken}}.Encode(),
@@ -249,10 +249,10 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
 	u, _ := url.Parse(`http://` + hostPath + queryString)
 	req := &http.Request{
 		Method: method,
-		Host: u.Host,
-		URL: u,
+		Host:   u.Host,
+		URL:    u,
 		Header: http.Header{"Content-Type": {contentType}},
-		Body: ioutil.NopCloser(strings.NewReader(body)),
+		Body:   ioutil.NopCloser(strings.NewReader(body)),
 	}
 
 	resp := httptest.NewRecorder()
@@ -261,14 +261,14 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
 		c.Assert(resp.Code, check.Equals, expectStatus)
 		return resp
 	}
-	c.Check(resp.Body.String(), check.Matches, `.*href="//` + regexp.QuoteMeta(html.EscapeString(hostPath)) + `".*`)
+	c.Check(resp.Body.String(), check.Matches, `.*href="//`+regexp.QuoteMeta(html.EscapeString(hostPath))+`".*`)
 	cookies := (&http.Response{Header: resp.Header()}).Cookies()
 
 	u, _ = u.Parse(resp.Header().Get("Location"))
 	req = &http.Request{
 		Method: "GET",
-		Host: u.Host,
-		URL: u,
+		Host:   u.Host,
+		URL:    u,
 		Header: http.Header{},
 	}
 	for _, c := range cookies {
diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
index fdbb50e..740d243 100644
--- a/services/keep-web/server_test.go
+++ b/services/keep-web/server_test.go
@@ -105,14 +105,14 @@ func (s *IntegrationSuite) test100BlockFile(c *check.C, blocksize int) {
 	err = arv.Create("collections",
 		map[string]interface{}{
 			"collection": map[string]interface{}{
-				"name": fmt.Sprintf("testdata blocksize=%d", blocksize),
+				"name":          fmt.Sprintf("testdata blocksize=%d", blocksize),
 				"manifest_text": mtext,
 			},
 		}, &coll)
 	c.Assert(err, check.Equals, nil)
 	uuid := coll["uuid"].(string)
 
-	hdr, body, size := s.runCurl(c, arv.ApiToken, uuid + ".dl.example.com", "/testdata.bin")
+	hdr, body, size := s.runCurl(c, arv.ApiToken, uuid+".dl.example.com", "/testdata.bin")
 	c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
 	c.Check(hdr, check.Matches, `(?si).*Content-length: `+fmt.Sprintf("%d00", blocksize)+`\r\n.*`)
 	c.Check([]byte(body)[:1234], check.DeepEquals, testdata[:1234])
@@ -139,82 +139,82 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 	for _, spec := range []curlCase{
 		// My collection
 		{
-			auth: arvadostest.ActiveToken,
-			host: arvadostest.FooCollection + "--dl.example.com",
-			path: "/foo",
+			auth:    arvadostest.ActiveToken,
+			host:    arvadostest.FooCollection + "--dl.example.com",
+			path:    "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			host: strings.Replace(arvadostest.FooPdh, "+", "-", 1) + ".dl.example.com",
-			path: "/t=" + arvadostest.ActiveToken + "/foo",
+			host:    strings.Replace(arvadostest.FooPdh, "+", "-", 1) + ".dl.example.com",
+			path:    "/t=" + arvadostest.ActiveToken + "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			path: "/c=" + arvadostest.FooPdh + "/t=" + arvadostest.ActiveToken + "/foo",
+			path:    "/c=" + arvadostest.FooPdh + "/t=" + arvadostest.ActiveToken + "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			path: "/c=" + strings.Replace(arvadostest.FooPdh, "+", "-", 1) + "/t=" + arvadostest.ActiveToken + "/_/foo",
+			path:    "/c=" + strings.Replace(arvadostest.FooPdh, "+", "-", 1) + "/t=" + arvadostest.ActiveToken + "/_/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			auth: "tokensobogus",
-			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			auth:    "tokensobogus",
+			path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			auth: arvadostest.ActiveToken,
-			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			auth:    arvadostest.ActiveToken,
+			path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 		{
-			auth: arvadostest.AnonymousToken,
-			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			auth:    arvadostest.AnonymousToken,
+			path:    "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
 			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
 		},
 
 		// Anonymously accessible user agreement
 		{
-			path: "/c=" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			path:    "/c=" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			host: arvadostest.HelloWorldCollection + ".dl.example.com",
-			path: "/Hello%20world.txt",
+			host:    arvadostest.HelloWorldCollection + ".dl.example.com",
+			path:    "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			host: arvadostest.HelloWorldCollection + ".dl.example.com",
-			path: "/_/Hello%20world.txt",
+			host:    arvadostest.HelloWorldCollection + ".dl.example.com",
+			path:    "/_/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			path:    "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			auth: arvadostest.ActiveToken,
-			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			auth:    arvadostest.ActiveToken,
+			path:    "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			auth: arvadostest.SpectatorToken,
-			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			auth:    arvadostest.SpectatorToken,
+			path:    "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			auth: arvadostest.SpectatorToken,
-			host: arvadostest.HelloWorldCollection + "--dl.example.com",
-			path: "/Hello%20world.txt",
+			auth:    arvadostest.SpectatorToken,
+			host:    arvadostest.HelloWorldCollection + "--dl.example.com",
+			path:    "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 		{
-			auth: arvadostest.SpectatorToken,
-			path: "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt",
+			auth:    arvadostest.SpectatorToken,
+			path:    "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt",
 			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
 		},
 	} {
@@ -238,7 +238,7 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 func (s *IntegrationSuite) runCurl(c *check.C, token, host, uri string, args ...string) (hdr, bodyPart string, bodySize int64) {
 	curlArgs := []string{"--silent", "--show-error", "--include"}
 	testHost, testPort, _ := net.SplitHostPort(s.testServer.Addr)
-	curlArgs = append(curlArgs, "--resolve", host + ":" + testPort + ":" + testHost)
+	curlArgs = append(curlArgs, "--resolve", host+":"+testPort+":"+testHost)
 	if token != "" {
 		curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
 	}

commit 44a382de4e7b8986c17e5baeae94f8e521d923b6
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Sep 7 03:43:59 2015 -0400

    5824: Add -attachment-only-host feature.

diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index 993b9db..8ae9490 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -173,6 +173,19 @@
 // (``https://dl.example.com/'') and upload it to some other site
 // chosen by the author of collection X.
 //
+// Attachment-Only host
+//
+// It is possible to serve untrusted content and accept user
+// credentials at the same origin as long as the content is only
+// downloaded, never executed by browsers. A single origin (hostname
+// and port) can be designated as an "attachment-only" origin: cookies
+// will be accepted and all responses will have a
+// "Content-Disposition: attachment" header. This behavior is invoked
+// only when the designated origin matches exactly the Host header
+// provided by the client or upstream proxy.
+//
+//   keep-web -attachment-only-host domain.example:9999
+//
 // Trust All Content mode
 //
 // In "trust all content" mode, Keep-web will accept credentials (API
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index c5d439a..b39a941 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -24,11 +24,14 @@ var (
 	clientPool      = arvadosclient.MakeClientPool()
 	trustAllContent = false
 	anonymousTokens []string
+	attachmentOnlyHost = ""
 )
 
 func init() {
 	flag.BoolVar(&trustAllContent, "trust-all-content", false,
 		"Serve non-public content from a single origin. Dangerous: read docs before using!")
+	flag.StringVar(&attachmentOnlyHost, "attachment-only-host", "",
+		"Accept credentials, and add \"Content-Disposition: attachment\" response headers, for requests at this hostname:port. Prohibiting inline display makes it possible to serve untrusted and non-public content from a single origin, i.e., without wildcard DNS or SSL.")
 }
 
 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
@@ -111,8 +114,16 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var tokens []string
 	var reqTokens []string
 	var pathToken bool
+	var attachment bool
 	credentialsOK := trustAllContent
 
+	if r.Host != "" && r.Host == attachmentOnlyHost {
+		credentialsOK = true
+		attachment = true
+	} else if r.FormValue("disposition") == "attachment" {
+		attachment = true
+	}
+
 	if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
 		// http://ID.dl.example/PATH...
 		credentialsOK = true
@@ -293,6 +304,9 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		}
 	}
 	w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
+	if attachment {
+		w.Header().Set("Content-Disposition", "attachment")
+	}
 
 	w.WriteHeader(http.StatusOK)
 	_, err = io.Copy(w, rdr)
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index e2f8edd..a64aeb5 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -201,6 +201,30 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C
 	)
 }
 
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenAttachmentOnlyHost(c *check.C) {
+	defer func(orig string) {
+		attachmentOnlyHost = orig
+	}(attachmentOnlyHost)
+	attachmentOnlyHost = "example.com:1234"
+
+	s.testVhostRedirectTokenToCookie(c, "GET",
+		"example.com/c=" + arvadostest.FooCollection + "/foo",
+		"?api_token=" + arvadostest.ActiveToken,
+		"text/plain",
+		"",
+		http.StatusBadRequest,
+	)
+
+	resp := s.testVhostRedirectTokenToCookie(c, "GET",
+		"example.com:1234/c=" + arvadostest.FooCollection + "/foo",
+		"?api_token=" + arvadostest.ActiveToken,
+		"text/plain",
+		"",
+		http.StatusOK,
+	)
+	c.Check(resp.Header().Get("Content-Disposition"), check.Equals, "attachment")
+}
+
 func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
 	s.testVhostRedirectTokenToCookie(c, "POST",
 		arvadostest.FooCollection + ".example.com/foo",
@@ -221,7 +245,7 @@ func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C)
 	)
 }
 
-func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString, contentType, body string, expectStatus int) {
+func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString, contentType, body string, expectStatus int) *httptest.ResponseRecorder {
 	u, _ := url.Parse(`http://` + hostPath + queryString)
 	req := &http.Request{
 		Method: method,
@@ -235,7 +259,7 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
 	(&handler{}).ServeHTTP(resp, req)
 	if resp.Code != http.StatusSeeOther {
 		c.Assert(resp.Code, check.Equals, expectStatus)
-		return
+		return resp
 	}
 	c.Check(resp.Body.String(), check.Matches, `.*href="//` + regexp.QuoteMeta(html.EscapeString(hostPath)) + `".*`)
 	cookies := (&http.Response{Header: resp.Header()}).Cookies()
@@ -258,4 +282,5 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
 	if expectStatus == http.StatusOK {
 		c.Check(resp.Body.String(), check.Equals, "foo")
 	}
+	return resp
 }

commit 1f04571a18a218c0968c92609a481913ec134171
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Sep 7 02:39:10 2015 -0400

    5824: Implement "trust all content" mode.

diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index 2f45781..993b9db 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -49,7 +49,7 @@
 // The following "same origin" URL patterns are supported for public
 // collections (i.e., collections which can be served by keep-web
 // without making use of any credentials supplied by the client). See
-// "Same-origin mode" below.
+// "Same-origin URLs" below.
 //
 //   http://dl.example.com/c=uuid_or_pdh/path/file.txt
 //   http://dl.example.com/c=uuid_or_pdh/t=TOKEN/path/file.txt
@@ -163,7 +163,7 @@
 // the local network -- the upstream proxy should configured to return
 // 401 for all paths beginning with "/c=".
 //
-// Same-origin mode
+// Same-origin URLs
 //
 // Without the same-origin protection outlined above, a web page
 // stored in collection X could execute JavaScript code that uses the
@@ -173,19 +173,7 @@
 // (``https://dl.example.com/'') and upload it to some other site
 // chosen by the author of collection X.
 //
-package main
-
-// TODO(TC): Implement?
-//
-// Trusted content
-//
-// Normally, Keep-web is installed using a wildcard DNS entry and a
-// wildcard HTTPS certificate, serving data from collection X at
-// ``https://X--dl.example.com/path/file.ext''.
-//
-// It will also serve publicly accessible data at
-// ``https://dl.example.com/collections/X/path/file.txt'', but it does not
-// accept any kind of credentials at paths like these.
+// Trust All Content mode
 //
 // In "trust all content" mode, Keep-web will accept credentials (API
 // tokens) and serve any collection X at
@@ -198,4 +186,4 @@ package main
 //
 //   keep-web -trust-all-content [...]
 //
-// In the general case, this should not be enabled: 
+package main
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 600e685..c5d439a 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -1,6 +1,7 @@
 package main
 
 import (
+	"flag"
 	"fmt"
 	"html"
 	"io"
@@ -19,8 +20,16 @@ import (
 
 type handler struct{}
 
-var clientPool = arvadosclient.MakeClientPool()
-var anonymousTokens []string
+var (
+	clientPool      = arvadosclient.MakeClientPool()
+	trustAllContent = false
+	anonymousTokens []string
+)
+
+func init() {
+	flag.BoolVar(&trustAllContent, "trust-all-content", false,
+		"Serve non-public content from a single origin. Dangerous: read docs before using!")
+}
 
 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
 // otherwise return "".
@@ -102,7 +111,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var tokens []string
 	var reqTokens []string
 	var pathToken bool
-	var credentialsOK bool
+	credentialsOK := trustAllContent
 
 	if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
 		// http://ID.dl.example/PATH...
@@ -139,7 +148,8 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		if !credentialsOK {
 			// It is not safe to copy the provided token
 			// into a cookie unless the current vhost
-			// (origin) serves only a single collection.
+			// (origin) serves only a single collection or
+			// we are in trustAllContent mode.
 			statusCode = http.StatusBadRequest
 			return
 		}
@@ -160,7 +170,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 			Name:     "api_token",
 			Value:    auth.EncodeTokenCookie([]byte(t)),
 			Path:     "/",
-			Expires:  time.Now().AddDate(10,0,0),
+			Expires:  time.Now().AddDate(10, 0, 0),
 			HttpOnly: true,
 		})
 		redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index 50fd717..e2f8edd 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -177,6 +177,30 @@ func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
 	)
 }
 
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenSingleOriginError(c *check.C) {
+	s.testVhostRedirectTokenToCookie(c, "GET",
+		"example.com/c=" + arvadostest.FooCollection + "/foo",
+		"?api_token=" + arvadostest.ActiveToken,
+		"text/plain",
+		"",
+		http.StatusBadRequest,
+	)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C) {
+	defer func(orig bool) {
+		trustAllContent = orig
+	}(trustAllContent)
+	trustAllContent = true
+	s.testVhostRedirectTokenToCookie(c, "GET",
+		"example.com/c=" + arvadostest.FooCollection + "/foo",
+		"?api_token=" + arvadostest.ActiveToken,
+		"text/plain",
+		"",
+		http.StatusOK,
+	)
+}
+
 func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
 	s.testVhostRedirectTokenToCookie(c, "POST",
 		arvadostest.FooCollection + ".example.com/foo",
@@ -209,7 +233,10 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
 
 	resp := httptest.NewRecorder()
 	(&handler{}).ServeHTTP(resp, req)
-	c.Assert(resp.Code, check.Equals, http.StatusSeeOther)
+	if resp.Code != http.StatusSeeOther {
+		c.Assert(resp.Code, check.Equals, expectStatus)
+		return
+	}
 	c.Check(resp.Body.String(), check.Matches, `.*href="//` + regexp.QuoteMeta(html.EscapeString(hostPath)) + `".*`)
 	cookies := (&http.Response{Header: resp.Header()}).Cookies()
 

commit 9fa083d19ca4c88c7e2802852b6a89da0575c73e
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Aug 30 02:45:58 2015 -0400

    5824: Add read-error and lots-of-blocks tests.

diff --git a/sdk/go/keepclient/collectionreader_test.go b/sdk/go/keepclient/collectionreader_test.go
index 51710b7..94e41e2 100644
--- a/sdk/go/keepclient/collectionreader_test.go
+++ b/sdk/go/keepclient/collectionreader_test.go
@@ -2,21 +2,48 @@ package keepclient
 
 import (
 	"crypto/md5"
+	"crypto/rand"
 	"fmt"
 	"io"
 	"io/ioutil"
 	"net/http"
 	"os"
+	"strconv"
+	"strings"
 
 	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
 	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
 	check "gopkg.in/check.v1"
 )
 
-var _ = check.Suite(&IntegrationSuite{})
+var _ = check.Suite(&CollectionReaderUnit{})
 
-// IntegrationSuite tests need an API server
-type IntegrationSuite struct{}
+type CollectionReaderUnit struct {
+	arv     arvadosclient.ArvadosClient
+	kc      *KeepClient
+	handler SuccessHandler
+}
+
+func (s *CollectionReaderUnit) SetUpTest(c *check.C) {
+	var err error
+	s.arv, err = arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.IsNil)
+	s.arv.ApiToken = arvadostest.ActiveToken
+
+	s.kc, err = MakeKeepClient(&s.arv)
+	c.Assert(err, check.IsNil)
+
+	s.handler = SuccessHandler{
+		disk: make(map[string][]byte),
+		lock: make(chan struct{}, 1),
+		ops: new(int),
+	}
+	localRoots := make(map[string]string)
+	for i, k := range RunSomeFakeKeepServers(s.handler, 4) {
+		localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+	}
+	s.kc.SetServiceRoots(localRoots, localRoots, nil)
+}
 
 type SuccessHandler struct {
 	disk map[string][]byte
@@ -64,33 +91,11 @@ type rdrTest struct {
 	want interface{} // error or string to expect
 }
 
-func StubWithFakeServers(kc *KeepClient, h http.Handler) {
-	localRoots := make(map[string]string)
-	for i, k := range RunSomeFakeKeepServers(h, 4) {
-		localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
-	}
-	kc.SetServiceRoots(localRoots, localRoots, nil)
-}
-
-func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
-	arv, err := arvadosclient.MakeArvadosClient()
-	c.Assert(err, check.IsNil)
-	arv.ApiToken = arvadostest.ActiveToken
-
-	kc, err := MakeKeepClient(&arv)
-	c.Assert(err, check.IsNil)
-
-	{
-		h := SuccessHandler{
-			disk: make(map[string][]byte),
-			lock: make(chan struct{}, 1),
-		}
-		StubWithFakeServers(kc, h)
-		kc.PutB([]byte("foo"))
-		kc.PutB([]byte("bar"))
-		kc.PutB([]byte("Hello world\n"))
-		kc.PutB([]byte(""))
-	}
+func (s *CollectionReaderUnit) TestCollectionReaderContent(c *check.C) {
+	s.kc.PutB([]byte("foo"))
+	s.kc.PutB([]byte("bar"))
+	s.kc.PutB([]byte("Hello world\n"))
+	s.kc.PutB([]byte(""))
 
 	mt := arvadostest.PathologicalManifest
 
@@ -116,7 +121,7 @@ func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 		{mt: mt, f: "segmented/frob", want: "frob"},
 		{mt: mt, f: "segmented/oof", want: "oof"},
 	} {
-		rdr, err := kc.CollectionFileReader(map[string]interface{}{"manifest_text": testCase.mt}, testCase.f)
+		rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": testCase.mt}, testCase.f)
 		switch want := testCase.want.(type) {
 		case error:
 			c.Check(rdr, check.IsNil)
@@ -136,21 +141,34 @@ func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 	}
 }
 
-func (s *ServerRequiredSuite) TestCollectionReaderCloseEarly(c *check.C) {
-	arv, err := arvadosclient.MakeArvadosClient()
-	c.Assert(err, check.IsNil)
-	arv.ApiToken = arvadostest.ActiveToken
-
-	kc, err := MakeKeepClient(&arv)
-	c.Assert(err, check.IsNil)
-
-	h := SuccessHandler{
-		disk: make(map[string][]byte),
-		lock: make(chan struct{}, 1),
-		ops: new(int),
+func (s *CollectionReaderUnit) TestCollectionReaderManyBlocks(c *check.C) {
+	h := md5.New()
+	buf := make([]byte, 4096)
+	locs := make([]string, len(buf))
+	filesize := 0
+	for i := 0; i < len(locs); i++ {
+		_, err := io.ReadFull(rand.Reader, buf[:i])
+		c.Assert(err, check.IsNil)
+		h.Write(buf[:i])
+		locs[i], _, err = s.kc.PutB(buf[:i])
+		c.Assert(err, check.IsNil)
+		filesize += i
 	}
-	StubWithFakeServers(kc, h)
-	kc.PutB([]byte("foo"))
+	manifest := "./random " + strings.Join(locs, " ") + " 0:" + strconv.Itoa(filesize) + ":bytes.bin\n"
+	dataMD5 := h.Sum(nil)
+
+	checkMD5 := md5.New()
+	rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": manifest}, "random/bytes.bin")
+	c.Check(err, check.IsNil)
+	_, err = io.Copy(checkMD5, rdr)
+	c.Check(err, check.IsNil)
+	_, err = rdr.Read(make([]byte, 1))
+	c.Check(err, check.Equals, io.EOF)
+	c.Check(checkMD5.Sum(nil), check.DeepEquals, dataMD5)
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderCloseEarly(c *check.C) {
+	s.kc.PutB([]byte("foo"))
 
 	mt := ". "
 	for i := 0; i < 1000; i++ {
@@ -161,23 +179,45 @@ func (s *ServerRequiredSuite) TestCollectionReaderCloseEarly(c *check.C) {
 	// Grab the stub server's lock, ensuring our cfReader doesn't
 	// get anything back from its first call to kc.Get() before we
 	// have a chance to call Close().
-	h.lock <- struct{}{}
-	opsBeforeRead := *h.ops
+	s.handler.lock <- struct{}{}
+	opsBeforeRead := *s.handler.ops
 
-	rdr, err := kc.CollectionFileReader(map[string]interface{}{"manifest_text": mt}, "foo1000.txt")
+	rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": mt}, "foo1000.txt")
 	c.Assert(err, check.IsNil)
+
+	firstReadDone := make(chan struct{})
+	go func() {
+		rdr.Read(make([]byte, 6))
+		firstReadDone <- struct{}{}
+	}()
 	err = rdr.Close()
 	c.Assert(err, check.IsNil)
 	c.Assert(rdr.Error(), check.IsNil)
 
 	// Release the stub server's lock. The first GET operation will proceed.
-	<-h.lock
+	<-s.handler.lock
+
+	// Make sure our first read operation consumes the data
+	// received from the first GET.
+	<-firstReadDone
 
 	// doGet() should close toRead before sending any more bufs to it.
-	if what, ok := <-rdr.toRead;  ok {
-		c.Errorf("Got %+v, expected toRead to be closed", what)
+	if what, ok := <-rdr.toRead; ok {
+		c.Errorf("Got %q, expected toRead to be closed", string(what))
 	}
 
 	// Stub should have handled exactly one GET request.
-	c.Assert(*h.ops, check.Equals, opsBeforeRead+1)
+	c.Assert(*s.handler.ops, check.Equals, opsBeforeRead+1)
+}
+
+func (s *CollectionReaderUnit) TestCollectionReaderDataError(c *check.C) {
+	manifest := ". ffffffffffffffffffffffffffffffff+1 0:1:notfound.txt\n"
+	buf := make([]byte, 1)
+	rdr, err := s.kc.CollectionFileReader(map[string]interface{}{"manifest_text": manifest}, "notfound.txt")
+	c.Check(err, check.IsNil)
+	for i := 0; i < 2; i++ {
+		_, err = io.ReadFull(rdr, buf)
+		c.Check(err, check.Not(check.IsNil))
+		c.Check(err, check.Not(check.Equals), io.EOF)
+	}
 }

commit 35d3f6566426986cc6da4a5ec4c4844cbbd17ce2
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Aug 30 02:44:18 2015 -0400

    5824: Turn off debug printfs unless enabled by calling program.

diff --git a/sdk/go/keepclient/keepclient.go b/sdk/go/keepclient/keepclient.go
index 8b7cf41..fc26f3e 100644
--- a/sdk/go/keepclient/keepclient.go
+++ b/sdk/go/keepclient/keepclient.go
@@ -11,7 +11,6 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/streamer"
 	"io"
 	"io/ioutil"
-	"log"
 	"net/http"
 	"os"
 	"regexp"
@@ -161,8 +160,8 @@ func (kc *KeepClient) Get(locator string) (io.ReadCloser, int64, string, error)
 			Check:  locator[0:32],
 		}, resp.ContentLength, url, nil
 	}
-	log.Printf("DEBUG: GET %s failed: %v", locator, errs)
-	return nil, 0, "", BlockNotFound
+	DebugPrintf("DEBUG: GET %s failed: %v", locator, errs)
+	return nil, 0, "", fmt.Errorf("%v", errs)
 }
 
 // Ask() verifies that a block with the given hash is available and
diff --git a/sdk/go/keepclient/keepclient_test.go b/sdk/go/keepclient/keepclient_test.go
index ee60d28..462fea7 100644
--- a/sdk/go/keepclient/keepclient_test.go
+++ b/sdk/go/keepclient/keepclient_test.go
@@ -126,10 +126,8 @@ func (s *StandaloneSuite) TestUploadToStubKeepServer(c *C) {
 		make(chan string)}
 
 	UploadToStubHelper(c, st,
-		func(kc *KeepClient, url string, reader io.ReadCloser,
-			writer io.WriteCloser, upload_status chan uploadStatus) {
-
-			go kc.uploadToKeepServer(url, st.expectPath, reader, upload_status, int64(len("foo")), "TestUploadToStubKeepServer")
+		func(kc *KeepClient, url string, reader io.ReadCloser, writer io.WriteCloser, upload_status chan uploadStatus) {
+			go kc.uploadToKeepServer(url, st.expectPath, reader, upload_status, int64(len("foo")), []byte{0})
 
 			writer.Write([]byte("foo"))
 			writer.Close()
@@ -153,15 +151,14 @@ func (s *StandaloneSuite) TestUploadToStubKeepServerBufferReader(c *C) {
 		make(chan string)}
 
 	UploadToStubHelper(c, st,
-		func(kc *KeepClient, url string, reader io.ReadCloser,
-			writer io.WriteCloser, upload_status chan uploadStatus) {
+		func(kc *KeepClient, url string, reader io.ReadCloser, writer io.WriteCloser, upload_status chan uploadStatus) {
 
 			tr := streamer.AsyncStreamFromReader(512, reader)
 			defer tr.Close()
 
 			br1 := tr.MakeStreamReader()
 
-			go kc.uploadToKeepServer(url, st.expectPath, br1, upload_status, 3, "TestUploadToStubKeepServerBufferReader")
+			go kc.uploadToKeepServer(url, st.expectPath, br1, upload_status, 3, []byte{0})
 
 			writer.Write([]byte("foo"))
 			writer.Close()
@@ -193,10 +190,9 @@ func (s *StandaloneSuite) TestFailedUploadToStubKeepServer(c *C) {
 	hash := "acbd18db4cc2f85cedef654fccc4a4d8"
 
 	UploadToStubHelper(c, st,
-		func(kc *KeepClient, url string, reader io.ReadCloser,
-			writer io.WriteCloser, upload_status chan uploadStatus) {
+		func(kc *KeepClient, url string, reader io.ReadCloser, writer io.WriteCloser, upload_status chan uploadStatus) {
 
-			go kc.uploadToKeepServer(url, hash, reader, upload_status, 3, "TestFailedUploadToStubKeepServer")
+			go kc.uploadToKeepServer(url, hash, reader, upload_status, 3, []byte{0})
 
 			writer.Write([]byte("foo"))
 			writer.Close()
diff --git a/sdk/go/keepclient/support.go b/sdk/go/keepclient/support.go
index 51e3e08..ab7e3c4 100644
--- a/sdk/go/keepclient/support.go
+++ b/sdk/go/keepclient/support.go
@@ -2,18 +2,23 @@ package keepclient
 
 import (
 	"crypto/md5"
+	"crypto/rand"
 	"errors"
 	"fmt"
 	"git.curoverse.com/arvados.git/sdk/go/streamer"
 	"io"
 	"io/ioutil"
-	"log"
 	"net"
 	"net/http"
 	"strings"
 	"time"
 )
 
+// Function used to emit debug messages. The easiest way to enable
+// keepclient debug messages in your application is to assign
+// log.Printf to DebugPrintf.
+var DebugPrintf = func(string, ...interface{}) {}
+
 type keepService struct {
 	Uuid     string `json:"uuid"`
 	Hostname string `json:"service_host"`
@@ -150,13 +155,13 @@ type uploadStatus struct {
 }
 
 func (this KeepClient) uploadToKeepServer(host string, hash string, body io.ReadCloser,
-	upload_status chan<- uploadStatus, expectedLength int64, requestId string) {
+	upload_status chan<- uploadStatus, expectedLength int64, requestId []byte) {
 
 	var req *http.Request
 	var err error
 	var url = fmt.Sprintf("%s/%s", host, hash)
 	if req, err = http.NewRequest("PUT", url, nil); err != nil {
-		log.Printf("[%v] Error creating request PUT %v error: %v", requestId, url, err.Error())
+		DebugPrintf("[%x] Error creating request PUT %v error: %v", requestId, url, err.Error())
 		upload_status <- uploadStatus{err, url, 0, 0, ""}
 		body.Close()
 		return
@@ -181,7 +186,7 @@ func (this KeepClient) uploadToKeepServer(host string, hash string, body io.Read
 
 	var resp *http.Response
 	if resp, err = this.Client.Do(req); err != nil {
-		log.Printf("[%v] Upload failed %v error: %v", requestId, url, err.Error())
+		DebugPrintf("[%x] Upload failed %v error: %v", requestId, url, err.Error())
 		upload_status <- uploadStatus{err, url, 0, 0, ""}
 		return
 	}
@@ -197,13 +202,13 @@ func (this KeepClient) uploadToKeepServer(host string, hash string, body io.Read
 	respbody, err2 := ioutil.ReadAll(&io.LimitedReader{resp.Body, 4096})
 	response := strings.TrimSpace(string(respbody))
 	if err2 != nil && err2 != io.EOF {
-		log.Printf("[%v] Upload %v error: %v response: %v", requestId, url, err2.Error(), response)
+		DebugPrintf("[%x] Upload %v error: %v response: %v", requestId, url, err2.Error(), response)
 		upload_status <- uploadStatus{err2, url, resp.StatusCode, rep, response}
 	} else if resp.StatusCode == http.StatusOK {
-		log.Printf("[%v] Upload %v success", requestId, url)
+		DebugPrintf("[%x] Upload %v success", requestId, url)
 		upload_status <- uploadStatus{nil, url, resp.StatusCode, rep, response}
 	} else {
-		log.Printf("[%v] Upload %v error: %v response: %v", requestId, url, resp.StatusCode, response)
+		DebugPrintf("[%x] Upload %v error: %v response: %v", requestId, url, resp.StatusCode, response)
 		upload_status <- uploadStatus{errors.New(resp.Status), url, resp.StatusCode, rep, response}
 	}
 }
@@ -213,9 +218,10 @@ func (this KeepClient) putReplicas(
 	tr *streamer.AsyncStream,
 	expectedLength int64) (locator string, replicas int, err error) {
 
-	// Take the hash of locator and timestamp in order to identify this
-	// specific transaction in log statements.
-	requestId := fmt.Sprintf("%x", md5.Sum([]byte(locator+time.Now().String())))[0:8]
+	// Generate an arbitrary ID to identify this specific
+	// transaction in debug logs.
+	requestId := make([]byte, 4)
+	io.ReadFull(rand.Reader, requestId)
 
 	// Calculate the ordering for uploading to servers
 	sv := NewRootSorter(this.WritableLocalRoots(), hash).GetSortedRoots()
@@ -253,7 +259,7 @@ func (this KeepClient) putReplicas(
 		for active*replicasPerThread < remaining_replicas {
 			// Start some upload requests
 			if next_server < len(sv) {
-				log.Printf("[%v] Begin upload %s to %s", requestId, hash, sv[next_server])
+				DebugPrintf("[%x] Begin upload %s to %s", requestId, hash, sv[next_server])
 				go this.uploadToKeepServer(sv[next_server], hash, tr.MakeStreamReader(), upload_status, expectedLength, requestId)
 				next_server += 1
 				active += 1
@@ -265,8 +271,7 @@ func (this KeepClient) putReplicas(
 				}
 			}
 		}
-		log.Printf("[%v] Replicas remaining to write: %v active uploads: %v",
-			requestId, remaining_replicas, active)
+		DebugPrintf("[%x] Replicas remaining to write: %v active uploads: %v", requestId, remaining_replicas, active)
 
 		// Now wait for something to happen.
 		status := <-upload_status
diff --git a/services/keepproxy/keepproxy.go b/services/keepproxy/keepproxy.go
index 8e734f7..c8f3795 100644
--- a/services/keepproxy/keepproxy.go
+++ b/services/keepproxy/keepproxy.go
@@ -84,6 +84,7 @@ func main() {
 		log.Fatalf("Error setting up arvados client %s", err.Error())
 	}
 
+	keepclient.DebugPrintf = log.Printf
 	kc, err := keepclient.MakeKeepClient(&arv)
 	if err != nil {
 		log.Fatalf("Error setting up keep client %s", err.Error())

commit 391fbe89b56b718f674822534c34ff80aa107576
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 11:08:16 2015 -0400

    5824: Fix up DNS docs.

diff --git a/doc/install/install-keep-web.html.textile.liquid b/doc/install/install-keep-web.html.textile.liquid
index 51b8d71..9282a8e 100644
--- a/doc/install/install-keep-web.html.textile.liquid
+++ b/doc/install/install-keep-web.html.textile.liquid
@@ -95,22 +95,20 @@ server {
 }
 </pre></notextile>
 
-h3. Tell the API server about the keep-web service
+h3. Configure DNS
 
-If your wildcard certificate is valid for <code>*.dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, add the following entry to your API server's @config/application.yml@ file:
+Configure your DNS servers so the following names resolve to your Nginx proxy's public IP address.
+* @*--dl.uuid_prefix.your.domain@, if your DNS server allows this without interfering with other DNS names; or
+* @*.dl.uuid_prefix.your.domain@, if you have a wildcard SSL certificate valid for these names; or
+* @dl.uuid_prefix.your.domain@, if neither of the above options is feasible. In this case, only unauthenticated requests will be served, i.e., public data and collection sharing links.
 
-<notextile>
-<pre><code>keep-web: https://%{uuid_or_pdh}.dl.<span class="userinput">uuid_prefix</span>.your.domain
-</code></pre>
-</notextile>
+h3. Tell the API server about the keep-web service
 
-If your wildcard certificate is valid for <code>*.<span class="userinput">uuid_prefix</span>.your.domain</code>, add the following entry to your API server's @config/application.yml@ file. This approach requires a wildcard DNS entry covering <code>*--dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, which might be difficult to do without affecting other services, depending on your DNS server software.
+Add *one* of the following entries to your API server's @config/application.yml@ file, depending on your DNS setup:
 
 <notextile>
 <pre><code>keep-web: https://%{uuid_or_pdh}--dl.<span class="userinput">uuid_prefix</span>.your.domain
+keep-web: https://%{uuid_or_pdh}.dl.<span class="userinput">uuid_prefix</span>.your.domain
+keep-web: https://dl.<span class="userinput">uuid_prefix</span>.your.domain
 </code></pre>
 </notextile>
-
-h3. Configure DNS
-
-Configure your DNS servers so <code>*--dl.<span class="userinput">uuid_prefix</span>.your.domain</code> or <code>*.dl.<span class="userinput">uuid_prefix</span>.your.domain</code> (according to the way you configured Workbench), as well as <code>dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, resolve to your Nginx proxy's public IP address.

commit 796a00b5887121e462f8a82391bac125120c2841
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 02:30:45 2015 -0400

    5824: Log X-Forwarded-For header value if provided.

diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 98dfdb3..600e685 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -62,6 +62,11 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var statusCode = 0
 	var statusText string
 
+	remoteAddr := r.RemoteAddr
+	if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
+		remoteAddr = xff + "," + remoteAddr
+	}
+
 	w := httpserver.WrapResponseWriter(wOrig)
 	defer func() {
 		if statusCode == 0 {
@@ -75,7 +80,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		if statusText == "" {
 			statusText = http.StatusText(statusCode)
 		}
-		httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
+		httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
 	}()
 
 	if r.Method != "GET" && r.Method != "POST" {

commit a5741dcc18ae0862b220ed08d4db136cfa979ec5
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 01:45:35 2015 -0400

    5824: Fail at startup if ARVADOS_API_HOST is not set.

diff --git a/services/keep-web/main.go b/services/keep-web/main.go
index d780cc3..751543e 100644
--- a/services/keep-web/main.go
+++ b/services/keep-web/main.go
@@ -17,6 +17,9 @@ func init() {
 
 func main() {
 	flag.Parse()
+	if os.Getenv("ARVADOS_API_HOST") == "" {
+		log.Fatal("ARVADOS_API_HOST environment variable must be set.")
+	}
 	srv := &server{}
 	if err := srv.Start(); err != nil {
 		log.Fatal(err)

commit eeda48bc31d5cd03d3a72becaaac4d643c9de46d
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 01:28:43 2015 -0400

    5824: Accept anonymous tokens on command line.

diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 7a2124a..98dfdb3 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -17,16 +17,10 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/keepclient"
 )
 
-var clientPool = arvadosclient.MakeClientPool()
-
-var anonymousTokens []string
-
 type handler struct{}
 
-func init() {
-	// TODO(TC): Get anonymousTokens from flags
-	anonymousTokens = []string{}
-}
+var clientPool = arvadosclient.MakeClientPool()
+var anonymousTokens []string
 
 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
 // otherwise return "".
diff --git a/services/keep-web/server.go b/services/keep-web/server.go
index 44da00f..2359f23 100644
--- a/services/keep-web/server.go
+++ b/services/keep-web/server.go
@@ -10,8 +10,8 @@ import (
 var address string
 
 func init() {
-	flag.StringVar(&address, "address", "0.0.0.0:80",
-		"Address to listen on, \"host:port\".")
+	flag.StringVar(&address, "address", ":80",
+		"Address to listen on: \"host:port\", or \":port\" to listen on all interfaces.")
 }
 
 type server struct {

commit 5ba24fbe0b0529bbbb5bda2790b61c35ef256469
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 01:09:46 2015 -0400

    5824: Handle various combinations of c= and t= more consistently. Use vhosts in integration tests.

diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index 44bcaec..2f45781 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -52,13 +52,12 @@
 // "Same-origin mode" below.
 //
 //   http://dl.example.com/c=uuid_or_pdh/path/file.txt
-//   http://dl.example.com/c=uuid_or_pdh/path/t=TOKEN/file.txt
+//   http://dl.example.com/c=uuid_or_pdh/t=TOKEN/path/file.txt
 //
 // The following "multiple origin" URL patterns are supported for all
 // collections:
 //
 //   http://uuid_or_pdh--dl.example.com/path/file.txt
-//   http://uuid_or_pdh--dl.example.com/t=/path/file.txt
 //   http://uuid_or_pdh--dl.example.com/t=TOKEN/path/file.txt
 //
 // In the "multiple origin" form, the string "--" can be replaced with
@@ -81,17 +80,35 @@
 // collection UUID or a portable data hash with the "+" character
 // replaced by "-".
 //
+// In all of the above forms, a top level directory called "_" is
+// skipped. In cases where the "path/file.txt" part might start with
+// "t=" or "c=" or "_/", links should be constructed with a leading
+// "_/" to ensure the top level directory is not interpreted as a
+// token or collection ID.
+//
 // Assuming there is a collection with UUID
 // zzzzz-4zz18-znfnqtbbv4spc3w and portable data hash
 // 1f4b0bc7583c2a7f9102c395f4ffc5e3+45, the following URLs are
 // interchangeable:
 //
 //   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/foo
-//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/t=/foo
-//   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/t=/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/_/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/_/foo
 //   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--foo.example.com/foo
 //   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--.invalid/foo
 //
+// An additional form is supported specifically to make it more
+// convenient to maintain support for existing Workbench download
+// links:
+//
+//   http://dl.example.com/collections/download/uuid_or_pdh/TOKEN/path/file.txt
+//
+// A regular Workbench "download" link is also accepted, but
+// credentials passed via cookie, header, etc. are ignored. Only
+// public data can be served this way:
+//
+//   http://dl.example.com/collections/uuid_or_pdh/path/file.txt
+//
 // Authorization mechanisms
 //
 // A token can be provided in an Authorization header:
@@ -158,7 +175,7 @@
 //
 package main
 
-// TODO(TC): Implement
+// TODO(TC): Implement?
 //
 // Trusted content
 //
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 657c72d..7a2124a 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -50,6 +50,20 @@ func parseCollectionIdFromDNSName(s string) string {
 	return ""
 }
 
+var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
+
+// return a UUID or PDH if s is a UUID or a PDH (even if it is a PDH
+// with "+" replaced by " " or "-"); otherwise return "".
+func parseCollectionIdFromURL(s string) string {
+	if arvadosclient.UUIDMatch(s) {
+		return s
+	}
+	if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
+		return pdh
+	}
+	return ""
+}
+
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var statusCode = 0
 	var statusText string
@@ -89,79 +103,104 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var tokens []string
 	var reqTokens []string
 	var pathToken bool
+	var credentialsOK bool
 
 	if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
-		// "http://{id}.domain.example.com/{path}" form
-		if t := r.FormValue("api_token"); t != "" {
-			// ...with explicit token in query string or
-			// form in POST body. We must encrypt the
-			// token such that it can only be used for
-			// this collection; put it in an HttpOnly
-			// cookie; and redirect to the same URL with
-			// the query param redacted, and method =
-			// GET.
-			//
-			// The HttpOnly flag is necessary to prevent
-			// JavaScript code (included in, or loaded by,
-			// a page in the collection being served) from
-			// employing the user's token beyond reading
-			// other files in the same domain, i.e., same
-			// the collection.
-			//
-			// The 303 redirect is necessary in the case
-			// of a GET request to avoid exposing the
-			// token in the Location bar, and in the case
-			// of a POST request to avoid raising warnings
-			// when the user refreshes the resulting page.
-			http.SetCookie(w, &http.Cookie{
-				Name:    "api_token",
-				Value:   auth.EncodeTokenCookie([]byte(t)),
-				Path:    "/",
-				Expires: time.Now().AddDate(10,0,0),
-			})
-			redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
-
-			w.Header().Add("Location", redir)
-			statusCode, statusText = http.StatusSeeOther, redir
-			w.WriteHeader(statusCode)
-			io.WriteString(w, `<A href="`)
-			io.WriteString(w, html.EscapeString(redir))
-			io.WriteString(w, `">Continue</A>`)
-			return
-		} else if strings.HasPrefix(pathParts[0], "t=") {
-			// ...with explicit token in path,
-			// "{...}.com/t={token}/{path}".  This form
-			// must only be used to pass scoped tokens
-			// that give permission for a single
-			// collection. See FormValue case above.
-			tokens = []string{pathParts[0][2:]}
-			targetPath = pathParts[1:]
+		// http://ID.dl.example/PATH...
+		credentialsOK = true
+		targetPath = pathParts
+	} else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
+		// /c=ID/PATH...
+		targetId = parseCollectionIdFromURL(pathParts[0][2:])
+		targetPath = pathParts[1:]
+	} else if len(pathParts) >= 3 && pathParts[0] == "collections" {
+		if len(pathParts) >= 5 && pathParts[1] == "download" {
+			// /collections/download/ID/TOKEN/PATH...
+			targetId = pathParts[2]
+			tokens = []string{pathParts[3]}
+			targetPath = pathParts[4:]
 			pathToken = true
 		} else {
-			// ...with cookie, Authorization header, or
-			// no token at all
-			reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
-			tokens = append(reqTokens, anonymousTokens...)
-			targetPath = pathParts
+			// /collections/ID/PATH...
+			targetId = pathParts[1]
+			tokens = anonymousTokens
+			targetPath = pathParts[2:]
 		}
-	} else if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
+	} else {
 		statusCode = http.StatusNotFound
 		return
-	} else if len(pathParts) >= 5 && pathParts[1] == "download" {
-		// "/collections/download/{id}/{token}/path..." form:
-		// Don't use our configured anonymous tokens,
-		// Authorization headers, etc.  Just use the token in
-		// the path.
-		targetId = pathParts[2]
-		tokens = []string{pathParts[3]}
-		targetPath = pathParts[4:]
+	}
+	if t := r.FormValue("api_token"); t != "" {
+		// The client provided an explicit token in the query
+		// string, or a form in POST body. We must put the
+		// token in an HttpOnly cookie, and redirect to the
+		// same URL with the query param redacted and method =
+		// GET.
+
+		if !credentialsOK {
+			// It is not safe to copy the provided token
+			// into a cookie unless the current vhost
+			// (origin) serves only a single collection.
+			statusCode = http.StatusBadRequest
+			return
+		}
+
+		// The HttpOnly flag is necessary to prevent
+		// JavaScript code (included in, or loaded by, a page
+		// in the collection being served) from employing the
+		// user's token beyond reading other files in the same
+		// domain, i.e., same collection.
+		//
+		// The 303 redirect is necessary in the case of a GET
+		// request to avoid exposing the token in the Location
+		// bar, and in the case of a POST request to avoid
+		// raising warnings when the user refreshes the
+		// resulting page.
+
+		http.SetCookie(w, &http.Cookie{
+			Name:     "api_token",
+			Value:    auth.EncodeTokenCookie([]byte(t)),
+			Path:     "/",
+			Expires:  time.Now().AddDate(10,0,0),
+			HttpOnly: true,
+		})
+		redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
+
+		w.Header().Add("Location", redir)
+		statusCode, statusText = http.StatusSeeOther, redir
+		w.WriteHeader(statusCode)
+		io.WriteString(w, `<A href="`)
+		io.WriteString(w, html.EscapeString(redir))
+		io.WriteString(w, `">Continue</A>`)
+		return
+	}
+
+	if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
+		// http://ID.example/t=TOKEN/PATH...
+		// /c=ID/t=TOKEN/PATH...
+		//
+		// This form must only be used to pass scoped tokens
+		// that give permission for a single collection. See
+		// FormValue case above.
+		tokens = []string{targetPath[0][2:]}
 		pathToken = true
-	} else {
-		// "/collections/{id}/path..." form
-		targetId = pathParts[1]
-		reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+		targetPath = targetPath[1:]
+	}
+
+	if tokens == nil {
+		if credentialsOK {
+			reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+		}
 		tokens = append(reqTokens, anonymousTokens...)
-		targetPath = pathParts[2:]
+	}
+
+	if len(targetPath) > 0 && targetPath[0] == "_" {
+		// If a collection has a directory called "t=foo" or
+		// "_", it can be served at //dl.example/_/t=foo/ or
+		// //dl.example/_/_/ respectively: //dl.example/t=foo/
+		// won't work because t=foo will be interpreted as a
+		// token "foo".
+		targetPath = targetPath[1:]
 	}
 
 	tokenResult := make(map[string]int)
@@ -188,11 +227,12 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		return
 	}
 	if !found {
-		if pathToken {
-			// The URL is a "secret sharing link", but it
-			// didn't work out. Asking the client for
-			// additional credentials would just be
-			// confusing.
+		if pathToken || !credentialsOK {
+			// Either the URL is a "secret sharing link"
+			// that didn't work out (and asking the client
+			// for additional credentials would just be
+			// confusing), or we don't even accept
+			// credentials at this path.
 			statusCode = http.StatusNotFound
 			return
 		}
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index b788a38..50fd717 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -99,8 +99,10 @@ func authzViaPOST(r *http.Request, tok string) int {
 func doVhostRequests(c *check.C, authz authorizer) {
 	for _, hostPath := range []string{
 		arvadostest.FooCollection + ".example.com/foo",
+		arvadostest.FooCollection + "--dl.example.com/foo",
+		arvadostest.FooCollection + "--dl.example.com/_/foo",
 		arvadostest.FooPdh + ".example.com/foo",
-		strings.Replace(arvadostest.FooPdh, "+", "-", -1) + ".example.com/foo",
+		strings.Replace(arvadostest.FooPdh, "+", "-", -1) + "--dl.example.com/foo",
 	} {
 		c.Log("doRequests: ", hostPath)
 		doVhostRequestsWithHostPath(c, authz, hostPath)
diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
index a2a5754..fdbb50e 100644
--- a/services/keep-web/server_test.go
+++ b/services/keep-web/server_test.go
@@ -29,7 +29,7 @@ func (s *IntegrationSuite) TestNoToken(c *check.C) {
 		"bogustoken",
 	} {
 		hdr, body, _ := s.runCurl(c, token, "dl.example.com", "/collections/"+arvadostest.FooCollection+"/foo")
-		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
+		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 		c.Check(body, check.Equals, "")
 
 		if token != "" {
@@ -119,6 +119,14 @@ func (s *IntegrationSuite) test100BlockFile(c *check.C, blocksize int) {
 	c.Check(size, check.Equals, int64(blocksize)*100)
 }
 
+type curlCase struct {
+	id      string
+	auth    string
+	host    string
+	path    string
+	dataMD5 string
+}
+
 func (s *IntegrationSuite) Test200(c *check.C) {
 	anonymousTokens = []string{arvadostest.AnonymousToken}
 	arv, err := arvadosclient.MakeArvadosClient()
@@ -128,28 +136,101 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 	c.Assert(err, check.Equals, nil)
 	kc.PutB([]byte("Hello world\n"))
 	kc.PutB([]byte("foo"))
-	for _, spec := range [][]string{
+	for _, spec := range []curlCase{
 		// My collection
-		{arvadostest.ActiveToken, "/collections/" + arvadostest.FooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{"", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{"tokensobogus", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{arvadostest.ActiveToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{arvadostest.AnonymousToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		// Anonymously accessible user agreement.
-		{"", "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{arvadostest.ActiveToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{arvadostest.SpectatorToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{arvadostest.SpectatorToken, "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{
+			auth: arvadostest.ActiveToken,
+			host: arvadostest.FooCollection + "--dl.example.com",
+			path: "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			host: strings.Replace(arvadostest.FooPdh, "+", "-", 1) + ".dl.example.com",
+			path: "/t=" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			path: "/c=" + arvadostest.FooPdh + "/t=" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			path: "/c=" + strings.Replace(arvadostest.FooPdh, "+", "-", 1) + "/t=" + arvadostest.ActiveToken + "/_/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			auth: "tokensobogus",
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			auth: arvadostest.ActiveToken,
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			auth: arvadostest.AnonymousToken,
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+
+		// Anonymously accessible user agreement
+		{
+			path: "/c=" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			host: arvadostest.HelloWorldCollection + ".dl.example.com",
+			path: "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			host: arvadostest.HelloWorldCollection + ".dl.example.com",
+			path: "/_/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.ActiveToken,
+			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.SpectatorToken,
+			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.SpectatorToken,
+			host: arvadostest.HelloWorldCollection + "--dl.example.com",
+			path: "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.SpectatorToken,
+			path: "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
 	} {
-		hdr, body, _ := s.runCurl(c, spec[0], "dl.example.com", spec[1])
+		host := spec.host
+		if host == "" {
+			host = "dl.example.com"
+		}
+		hdr, body, _ := s.runCurl(c, spec.auth, host, spec.path)
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
-		if strings.HasSuffix(spec[1], ".txt") {
+		if strings.HasSuffix(spec.path, ".txt") {
 			c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
 			// TODO: Check some types that aren't
 			// automatically detected by Go's http server
 			// by sniffing the content.
 		}
-		c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec[2])
+		c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec.dataMD5)
 	}
 }
 

commit 7796b112345b302ff6108ff761ff0d3c871bf888
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:34:21 2015 -0400

    5824: Comment to explain "authorizer" test helpers.

diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index 0494376..b788a38 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -42,6 +42,10 @@ func (s *IntegrationSuite) TestVhost404(c *check.C) {
 	}
 }
 
+// An authorizer modifies an HTTP request to make use of the given
+// token -- by adding it to a header, cookie, query param, or whatever
+// -- and returns the HTTP status code we should expect from keep-web if
+// the token is invalid.
 type authorizer func(*http.Request, string) int
 
 func (s *IntegrationSuite) TestVhostViaAuthzHeader(c *check.C) {

commit 8bb5dcb75f10e8128e2b4b5b741a96e781174997
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:33:44 2015 -0400

    5824: Fix up support for PDH in vhostname.

diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index f4f9ab5..44bcaec 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -74,7 +74,8 @@
 // upstream proxy.
 //
 // In all of the above forms, the "dl.example.com" part can be
-// anything at all.
+// anything at all: keep-web ignores everything after the first "." or
+// "--".
 //
 // In all of the above forms, the "uuid_or_pdh" part can be either a
 // collection UUID or a portable data hash with the "+" character
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 30b4b64..657c72d 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -28,7 +28,8 @@ func init() {
 	anonymousTokens = []string{}
 }
 
-// return s if s is a UUID or a PDH, otherwise ""
+// return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
+// otherwise return "".
 func parseCollectionIdFromDNSName(s string) string {
 	// Strip domain.
 	if i := strings.IndexRune(s, '.'); i >= 0 {
@@ -40,10 +41,13 @@ func parseCollectionIdFromDNSName(s string) string {
 	if i := strings.Index(s, "--"); i >= 0 {
 		s = s[:i]
 	}
-	if !arvadosclient.UUIDMatch(s) && !arvadosclient.PDHMatch(s) {
-		return ""
+	if arvadosclient.UUIDMatch(s) {
+		return s
 	}
-	return s
+	if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
+		return pdh
+	}
+	return ""
 }
 
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index a1f5e1a..0494376 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -93,7 +93,17 @@ func authzViaPOST(r *http.Request, tok string) int {
 // Try some combinations of {url, token} using the given authorization
 // mechanism, and verify the result is correct.
 func doVhostRequests(c *check.C, authz authorizer) {
-	hostPath := arvadostest.FooCollection + ".example.com/foo"
+	for _, hostPath := range []string{
+		arvadostest.FooCollection + ".example.com/foo",
+		arvadostest.FooPdh + ".example.com/foo",
+		strings.Replace(arvadostest.FooPdh, "+", "-", -1) + ".example.com/foo",
+	} {
+		c.Log("doRequests: ", hostPath)
+		doVhostRequestsWithHostPath(c, authz, hostPath)
+	}
+}
+
+func doVhostRequestsWithHostPath(c *check.C, authz authorizer, hostPath string) {
 	for _, tok := range []string{
 		arvadostest.ActiveToken,
 		arvadostest.ActiveToken[:15],

commit e0aa44d1f555056ba1dc8e866cf6ae50494cb3e2
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:31:19 2015 -0400

    5824: Modernize install page, cf. other services.

diff --git a/doc/_config.yml b/doc/_config.yml
index 1bdd2ab..d67668a 100644
--- a/doc/_config.yml
+++ b/doc/_config.yml
@@ -154,6 +154,7 @@ navbar:
       - install/create-standard-objects.html.textile.liquid
       - install/install-keepstore.html.textile.liquid
       - install/install-keepproxy.html.textile.liquid
+      - install/install-keep-web.html.textile.liquid
       - install/install-crunch-dispatch.html.textile.liquid
       - install/install-compute-node.html.textile.liquid
     - Helpful hints:
diff --git a/doc/install/install-keep-web.html.textile.liquid b/doc/install/install-keep-web.html.textile.liquid
index 4777668..51b8d71 100644
--- a/doc/install/install-keep-web.html.textile.liquid
+++ b/doc/install/install-keep-web.html.textile.liquid
@@ -1,31 +1,33 @@
 ---
 layout: default
 navsection: installguide
-title: Install download server
+title: Install the download server
 ...
 
-This installation guide assumes you are on a 64 bit Debian or Ubuntu system.
-
-The keep-web server provides read-only HTTP access to files stored in Keep. It serves public data to anonymous and unauthenticated clients, and accepts authentication via Arvados tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides SSL support.
+The keep-web server provides read-only HTTP access to files stored in Keep. It serves public data to unauthenticated clients, and serves private data to clients that supply Arvados API tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides SSL support. See the "godoc page":http://godoc.org/github.com/curoverse/arvados/services/keep-web for more detail.
 
 By convention, we use the following hostname for the download service:
 
-<div class="offset1">
-table(table table-bordered table-condensed).
-|dl. at uuid_prefix@.your.domain|
-</div>
+<notextile>
+<pre><code>dl.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>
 
 This hostname should resolve from anywhere on the internet.
 
 h2. Install keep-web
 
-First add the Arvados apt repository, and then install the keep-web package.
+On Debian-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install keep-web</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems:
 
 <notextile>
-<pre><code>~$ <span class="userinput">echo "deb http://apt.arvados.org/ wheezy main" | sudo tee /etc/apt/sources.list.d/apt.arvados.org.list</span>
-~$ <span class="userinput">sudo /usr/bin/apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7</span>
-~$ <span class="userinput">sudo /usr/bin/apt-get update</span>
-~$ <span class="userinput">sudo /usr/bin/apt-get install keep-web</span>
+<pre><code>~$ <span class="userinput">sudo yum install keep-web</span>
 </code></pre>
 </notextile>
 
@@ -34,31 +36,81 @@ Verify that @keep-web@ is functional:
 <notextile>
 <pre><code>~$ <span class="userinput">keep-web -h</span>
 Usage of keep-web:
-  -address="0.0.0.0:80": Address to listen on, "host:port".
+  -address string
+        Address to listen on: "host:port", or ":port" to listen on all interfaces. (default ":80")
+  -anonymous-token value
+        API token to try when none of the tokens provided in an HTTP request succeed in reading the desired collection. If this flag is used more than once, each token will be attempted in turn until one works. (default [])
 </code></pre>
 </notextile>
 
-We recommend running @arv-git-httpd@ under "runit":https://packages.debian.org/search?keywords=runit or something similar.
+If you intend to use Keep-web to serve public data to anonymous clients, configure it with an anonymous token. You can use the same one you used when you set up your Keepproxy server, or use the following command on the <strong>API server</strong> to create another:
+
+<notextile>
+<pre><code>/var/www/arvados-api/current/script$ <span class="userinput">RAILS_ENV=production bundle exec ./get_anonymous_user_token.rb</span>
+hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r
+</code></pre></notextile>
 
-Your @run@ script should look something like this:
+We recommend running @keep-web@ under "runit":https://packages.debian.org/search?keywords=runit or a similar supervisor. The basic command to start @keep-web@ is:
 
 <notextile>
 <pre><code>export ARVADOS_API_HOST=<span class="userinput">uuid_prefix</span>.your.domain
-exec sudo -u nobody keep-web -address=:9002 2>&1
+exec sudo -u nobody keep-web -address=<span class="userinput">:9002</span> -anonymous-token=<span class="userinput">hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r</span> 2>&1
 </code></pre>
 </notextile>
 
+Omit the @-anonymous-token@ arguments if you do not want to serve public data.
+
+Set @ARVADOS_API_HOST_INSECURE=1@ if your API server's SSL certificate is not signed by a recognized CA.
+
 h3. Set up a reverse proxy with SSL support
 
 The keep-web service will be accessible from anywhere on the internet, so we recommend using SSL for transport encryption.
 
 This is best achieved by putting a reverse proxy with SSL support in front of keep-web, running on port 443 and passing requests to keep-web on port 9002 (or whatever port you chose in your run script).
 
+Note: A wildcard SSL certificate is required in order to proxy keep-web effectively.
+
+For example, using Nginx:
+
+<notextile><pre>
+upstream keep-web {
+  server                127.0.0.1:<span class="userinput">9002</span>;
+}
+
+server {
+  listen                <span class="userinput">[your public IP address]</span>:443 ssl;
+  server_name           dl.<span class="userinput">uuid_prefix</span>.your.domain *.dl.<span class="userinput">uuid_prefix</span>.your.domain ~.*--dl.<span class="userinput">uuid_prefix</span>.your.domain;
+
+  proxy_connect_timeout 90s;
+  proxy_read_timeout    300s;
+
+  ssl                   on;
+  ssl_certificate       <span class="userinput"/>YOUR/PATH/TO/cert.pem</span>;
+  ssl_certificate_key   <span class="userinput"/>YOUR/PATH/TO/cert.key</span>;
+
+  location / {
+    proxy_pass          http://keep-web;
+    proxy_set_header    X-Forwarded-For $proxy_add_x_forwarded_for;
+  }
+}
+</pre></notextile>
+
 h3. Tell the API server about the keep-web service
 
-In your API server's config/application.yml file, add the following entry:
+If your wildcard certificate is valid for <code>*.dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, add the following entry to your API server's @config/application.yml@ file:
 
 <notextile>
-<pre><code>keep-web: dl.<span class="userinput">uuid_prefix</span>.your.domain
+<pre><code>keep-web: https://%{uuid_or_pdh}.dl.<span class="userinput">uuid_prefix</span>.your.domain
 </code></pre>
 </notextile>
+
+If your wildcard certificate is valid for <code>*.<span class="userinput">uuid_prefix</span>.your.domain</code>, add the following entry to your API server's @config/application.yml@ file. This approach requires a wildcard DNS entry covering <code>*--dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, which might be difficult to do without affecting other services, depending on your DNS server software.
+
+<notextile>
+<pre><code>keep-web: https://%{uuid_or_pdh}--dl.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>
+
+h3. Configure DNS
+
+Configure your DNS servers so <code>*--dl.<span class="userinput">uuid_prefix</span>.your.domain</code> or <code>*.dl.<span class="userinput">uuid_prefix</span>.your.domain</code> (according to the way you configured Workbench), as well as <code>dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, resolve to your Nginx proxy's public IP address.
diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index 325668f..f4f9ab5 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -4,6 +4,8 @@
 // can be installed anywhere with access to Keep services, typically
 // behind a web proxy that supports TLS.
 //
+// See http://doc.arvados.org/install/install-keep-web.html.
+//
 // Starting the server
 //
 // Serve HTTP requests at port 1234 on all interfaces:

commit f04287b86a8d4e8b74cf8d894ef7ce5420364f7a
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:17:54 2015 -0400

    5824: Clarify difference between keepproxy and keepstore (bandwidth and convenience -- not security).

diff --git a/doc/install/install-keepproxy.html.textile.liquid b/doc/install/install-keepproxy.html.textile.liquid
index 14b252f..3b658f8 100644
--- a/doc/install/install-keepproxy.html.textile.liquid
+++ b/doc/install/install-keepproxy.html.textile.liquid
@@ -4,9 +4,9 @@ navsection: installguide
 title: Install Keepproxy server
 ...
 
-The Keepproxy server is a gateway into your Keep storage. Unlike the Keepstore servers, which are only accessible on the local LAN, Keepproxy is designed to provide secure access into Keep from anywhere on the internet.
+The Keepproxy server is a gateway into your Keep storage. Unlike the Keepstore servers, which are only accessible on the local LAN, Keepproxy is suitable for lower-bandwidth clients located elsewhere on the internet: a client sends a single copy of a data block, and Keepproxy sends copies to the appropriate Keepstore servers. Keepproxy also accepts requests from clients that do not compute data hashes before uploading data: notably, the browser-based upload feature in Workbench requires Keepproxy.
 
-By convention, we use the following hostname for the Keepproxy:
+By convention, we use the following hostname for the Keepproxy server:
 
 <div class="offset1">
 table(table table-bordered table-condensed).

commit 91062e85e93d0d0ae557a478f1f83b133d4d2080
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:16:26 2015 -0400

    5824: Update keepproxy usage.

diff --git a/doc/install/install-keepproxy.html.textile.liquid b/doc/install/install-keepproxy.html.textile.liquid
index 6a531a3..14b252f 100644
--- a/doc/install/install-keepproxy.html.textile.liquid
+++ b/doc/install/install-keepproxy.html.textile.liquid
@@ -36,12 +36,13 @@ Verify that Keepproxy is functional:
 
 <notextile>
 <pre><code>~$ <span class="userinput">keepproxy -h</span>
-Usage of default:
+Usage of keepproxy:
   -default-replicas=2: Default number of replicas to write if not specified by the client.
   -listen=":25107": Interface on which to listen for requests, in the format ipaddr:port. e.g. -listen=10.0.1.24:8000. Use -listen=:port to listen on all network interfaces.
   -no-get=false: If set, disable GET operations
   -no-put=false: If set, disable PUT operations
   -pid="": Path to write pid file
+  -timeout=15: Timeout on requests to internal Keep services (default 15 seconds)
 </code></pre>
 </notextile>
 
diff --git a/services/keepproxy/keepproxy.go b/services/keepproxy/keepproxy.go
index 7900096..8e734f7 100644
--- a/services/keepproxy/keepproxy.go
+++ b/services/keepproxy/keepproxy.go
@@ -37,7 +37,7 @@ func main() {
 		pidfile          string
 	)
 
-	flagset := flag.NewFlagSet("default", flag.ExitOnError)
+	flagset := flag.NewFlagSet("keepproxy", flag.ExitOnError)
 
 	flagset.StringVar(
 		&listen,

commit b9d5acacf4ac027867e388040221101b73dbf118
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Aug 16 00:16:27 2015 -0400

    5824: Fix up error checking and early-close behavior inCollectionFileReader.

diff --git a/sdk/go/keepclient/collectionreader.go b/sdk/go/keepclient/collectionreader.go
index 5db944c..0d05b8a 100644
--- a/sdk/go/keepclient/collectionreader.go
+++ b/sdk/go/keepclient/collectionreader.go
@@ -8,6 +8,17 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/manifest"
 )
 
+const (
+	// After reading a data block from Keep, cfReader slices it up
+	// and sends the slices to a buffered channel to be consumed
+	// by the caller via Read().
+	//
+	// dataSliceSize is the maximum size of the slices, and
+	// therefore the maximum number of bytes that will be returned
+	// by a single call to Read().
+	dataSliceSize = 1 << 20
+)
+
 // ErrNoManifest indicates the given collection has no manifest
 // information (e.g., manifest_text was excluded by a "select"
 // parameter when retrieving the collection record).
@@ -40,8 +51,10 @@ func (kc *KeepClient) CollectionFileReader(collection map[string]interface{}, fi
 			}
 			q = append(q, seg)
 			r.totalSize += uint64(seg.Len)
-			// Send toGet whatever it's ready to receive.
-			Q: for len(q) > 0 {
+			// Send toGet as many segments as we can until
+			// it blocks.
+		Q:
+			for len(q) > 0 {
 				select {
 				case r.toGet <- q[0]:
 					q = q[1:]
@@ -75,84 +88,127 @@ type cfReader struct {
 	// doGet() reads FileSegments from toGet, gets the data from
 	// Keep, and sends byte slices to toRead to be consumed by
 	// Read().
-	toGet        chan *manifest.FileSegment
-	toRead       chan []byte
+	toGet chan *manifest.FileSegment
+	// toRead is a buffered channel, sized to fit one full Keep
+	// block. This lets us verify checksums without having a
+	// store-and-forward delay between blocks: by the time the
+	// caller starts receiving data from block N, cfReader is
+	// starting to fetch block N+1. A larger buffer would be
+	// useful for a caller whose read speed varies a lot.
+	toRead chan []byte
 	// bytes ready to send next time someone calls Read()
-	buf          []byte
+	buf []byte
 	// Total size of the file being read. Not safe to read this
 	// until countDone is closed.
-	totalSize    uint64
-	countDone    chan struct{}
+	totalSize uint64
+	countDone chan struct{}
 	// First error encountered.
-	err          error
+	err error
+	// errNotNil is closed IFF err contains a non-nil error.
+	// Receiving from it will block until an error occurs.
+	errNotNil chan struct{}
+	// rdrClosed is closed IFF the reader's Close() method has
+	// been called. Any goroutines associated with the reader will
+	// stop and free up resources when they notice this channel is
+	// closed.
+	rdrClosed chan struct{}
 }
 
-func (r *cfReader) Read(outbuf []byte) (n int, err error) {
-	if r.err != nil {
-		return 0, r.err
+func (r *cfReader) Read(outbuf []byte) (int, error) {
+	if r.Error() != nil {
+		return 0, r.Error()
 	}
 	for r.buf == nil || len(r.buf) == 0 {
 		var ok bool
 		r.buf, ok = <-r.toRead
-		if r.err != nil {
-			return 0, r.err
+		if r.Error() != nil {
+			return 0, r.Error()
 		} else if !ok {
 			return 0, io.EOF
 		}
 	}
+	n := len(r.buf)
 	if len(r.buf) > len(outbuf) {
 		n = len(outbuf)
-	} else {
-		n = len(r.buf)
 	}
 	copy(outbuf[:n], r.buf[:n])
 	r.buf = r.buf[n:]
-	return
+	return n, nil
 }
 
 func (r *cfReader) Close() error {
-	_, _ = <-r.countDone
-	for _ = range r.toGet {
-	}
-	for _ = range r.toRead {
+	close(r.rdrClosed)
+	return r.Error()
+}
+
+func (r *cfReader) Error() error {
+	select {
+	case <-r.errNotNil:
+		return r.err
+	default:
+		return nil
 	}
-	return r.err
 }
 
 func (r *cfReader) Len() uint64 {
 	// Wait for all segments to be counted
-	_, _ = <-r.countDone
+	<-r.countDone
 	return r.totalSize
 }
 
 func (r *cfReader) doGet() {
 	defer close(r.toRead)
+GET:
 	for fs := range r.toGet {
 		rdr, _, _, err := r.keepClient.Get(fs.Locator)
 		if err != nil {
 			r.err = err
+			close(r.errNotNil)
 			return
 		}
 		var buf = make([]byte, fs.Offset+fs.Len)
 		_, err = io.ReadFull(rdr, buf)
 		if err != nil {
 			r.err = err
+			close(r.errNotNil)
 			return
 		}
-		for bOff, bLen := fs.Offset, 1<<20; bOff <= fs.Offset+fs.Len && bLen > 0; bOff += bLen {
+		for bOff, bLen := fs.Offset, dataSliceSize; bOff <= fs.Offset+fs.Len && bLen > 0; bOff += bLen {
 			if bOff+bLen > fs.Offset+fs.Len {
 				bLen = fs.Offset + fs.Len - bOff
 			}
-			r.toRead <- buf[bOff : bOff+bLen]
+			select {
+			case r.toRead <- buf[bOff : bOff+bLen]:
+			case <-r.rdrClosed:
+				// Reader is closed: no point sending
+				// anything more to toRead.
+				break GET
+			}
+		}
+		// It is possible that r.rdrClosed is closed but we
+		// never noticed because r.toRead was also ready in
+		// every select{} above. Here we check before wasting
+		// a keepclient.Get() call.
+		select {
+		case <-r.rdrClosed:
+			break GET
+		default:
 		}
 	}
+	// In case we exited the above loop early: before returning,
+	// drain the toGet channel so its sender doesn't sit around
+	// blocking forever.
+	for _ = range r.toGet {
+	}
 }
 
 func newCFReader(kc *KeepClient) (r *cfReader) {
 	r = new(cfReader)
 	r.keepClient = kc
+	r.rdrClosed = make(chan struct{})
+	r.errNotNil = make(chan struct{})
 	r.toGet = make(chan *manifest.FileSegment, 2)
-	r.toRead = make(chan []byte)
+	r.toRead = make(chan []byte, (BLOCKSIZE+dataSliceSize-1)/dataSliceSize)
 	r.countDone = make(chan struct{})
 	go r.doGet()
 	return
diff --git a/sdk/go/keepclient/collectionreader_test.go b/sdk/go/keepclient/collectionreader_test.go
index f271208..51710b7 100644
--- a/sdk/go/keepclient/collectionreader_test.go
+++ b/sdk/go/keepclient/collectionreader_test.go
@@ -20,7 +20,8 @@ type IntegrationSuite struct{}
 
 type SuccessHandler struct {
 	disk map[string][]byte
-	lock chan struct{}
+	lock chan struct{}	// channel with buffer==1: full when an operation is in progress.
+	ops  *int		// number of operations completed
 }
 
 func (h SuccessHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
@@ -34,12 +35,18 @@ func (h SuccessHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
 		pdh := fmt.Sprintf("%x+%d", md5.Sum(buf), len(buf))
 		h.lock <- struct{}{}
 		h.disk[pdh] = buf
+		if h.ops != nil {
+			(*h.ops)++
+		}
 		<- h.lock
 		resp.Write([]byte(pdh))
 	case "GET":
 		pdh := req.URL.Path[1:]
 		h.lock <- struct{}{}
 		buf, ok := h.disk[pdh]
+		if h.ops != nil {
+			(*h.ops)++
+		}
 		<- h.lock
 		if !ok {
 			resp.WriteHeader(http.StatusNotFound)
@@ -57,6 +64,14 @@ type rdrTest struct {
 	want interface{} // error or string to expect
 }
 
+func StubWithFakeServers(kc *KeepClient, h http.Handler) {
+	localRoots := make(map[string]string)
+	for i, k := range RunSomeFakeKeepServers(h, 4) {
+		localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+	}
+	kc.SetServiceRoots(localRoots, localRoots, nil)
+}
+
 func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 	arv, err := arvadosclient.MakeArvadosClient()
 	c.Assert(err, check.IsNil)
@@ -66,12 +81,11 @@ func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 	c.Assert(err, check.IsNil)
 
 	{
-		localRoots := make(map[string]string)
-		h := SuccessHandler{disk: make(map[string][]byte), lock: make(chan struct{}, 1)}
-		for i, k := range RunSomeFakeKeepServers(h, 4) {
-			localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+		h := SuccessHandler{
+			disk: make(map[string][]byte),
+			lock: make(chan struct{}, 1),
 		}
-		kc.SetServiceRoots(localRoots, localRoots, nil)
+		StubWithFakeServers(kc, h)
 		kc.PutB([]byte("foo"))
 		kc.PutB([]byte("bar"))
 		kc.PutB([]byte("Hello world\n"))
@@ -121,3 +135,49 @@ func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 		}
 	}
 }
+
+func (s *ServerRequiredSuite) TestCollectionReaderCloseEarly(c *check.C) {
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.IsNil)
+	arv.ApiToken = arvadostest.ActiveToken
+
+	kc, err := MakeKeepClient(&arv)
+	c.Assert(err, check.IsNil)
+
+	h := SuccessHandler{
+		disk: make(map[string][]byte),
+		lock: make(chan struct{}, 1),
+		ops: new(int),
+	}
+	StubWithFakeServers(kc, h)
+	kc.PutB([]byte("foo"))
+
+	mt := ". "
+	for i := 0; i < 1000; i++ {
+		mt += "acbd18db4cc2f85cedef654fccc4a4d8+3 "
+	}
+	mt += "0:3000:foo1000.txt\n"
+
+	// Grab the stub server's lock, ensuring our cfReader doesn't
+	// get anything back from its first call to kc.Get() before we
+	// have a chance to call Close().
+	h.lock <- struct{}{}
+	opsBeforeRead := *h.ops
+
+	rdr, err := kc.CollectionFileReader(map[string]interface{}{"manifest_text": mt}, "foo1000.txt")
+	c.Assert(err, check.IsNil)
+	err = rdr.Close()
+	c.Assert(err, check.IsNil)
+	c.Assert(rdr.Error(), check.IsNil)
+
+	// Release the stub server's lock. The first GET operation will proceed.
+	<-h.lock
+
+	// doGet() should close toRead before sending any more bufs to it.
+	if what, ok := <-rdr.toRead;  ok {
+		c.Errorf("Got %+v, expected toRead to be closed", what)
+	}
+
+	// Stub should have handled exactly one GET request.
+	c.Assert(*h.ops, check.Equals, opsBeforeRead+1)
+}

commit 8146d4cc8c3d52f548f6af66de26f30881ebec39
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jul 29 03:33:19 2015 -0400

    5824: Add Content-Length header.

diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 03b3e26..30b4b64 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -239,6 +239,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 			w.Header().Set("Content-Type", t)
 		}
 	}
+	w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
 
 	w.WriteHeader(http.StatusOK)
 	_, err = io.Copy(w, rdr)

commit 2a54340956104d689ab52f60d82af5555a103919
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jul 29 01:56:55 2015 -0400

    5824: Use vhosts in curl integration tests. Add large file test.

diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
index b4d6d17..a2a5754 100644
--- a/services/keep-web/server_test.go
+++ b/services/keep-web/server_test.go
@@ -3,6 +3,9 @@ package main
 import (
 	"crypto/md5"
 	"fmt"
+	"io"
+	"io/ioutil"
+	"net"
 	"os/exec"
 	"strings"
 	"testing"
@@ -25,17 +28,17 @@ func (s *IntegrationSuite) TestNoToken(c *check.C) {
 		"",
 		"bogustoken",
 	} {
-		hdr, body := s.runCurl(c, token, "/collections/"+arvadostest.FooCollection+"/foo")
+		hdr, body, _ := s.runCurl(c, token, "dl.example.com", "/collections/"+arvadostest.FooCollection+"/foo")
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
 		c.Check(body, check.Equals, "")
 
 		if token != "" {
-			hdr, body = s.runCurl(c, token, "/collections/download/"+arvadostest.FooCollection+"/"+token+"/foo")
+			hdr, body, _ = s.runCurl(c, token, "dl.example.com", "/collections/download/"+arvadostest.FooCollection+"/"+token+"/foo")
 			c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 			c.Check(body, check.Equals, "")
 		}
 
-		hdr, body = s.runCurl(c, token, "/bad-route")
+		hdr, body, _ = s.runCurl(c, token, "dl.example.com", "/bad-route")
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 		c.Check(body, check.Equals, "")
 	}
@@ -64,12 +67,58 @@ func (s *IntegrationSuite) Test404(c *check.C) {
 		"/collections/" + arvadostest.NonexistentCollection + "/theperthcountyconspiracy",
 		"/collections/download/" + arvadostest.NonexistentCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
 	} {
-		hdr, body := s.runCurl(c, arvadostest.ActiveToken, uri)
+		hdr, body, _ := s.runCurl(c, arvadostest.ActiveToken, "dl.example.com", uri)
 		c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
 		c.Check(body, check.Equals, "")
 	}
 }
 
+func (s *IntegrationSuite) Test1GBFile(c *check.C) {
+	if testing.Short() {
+		c.Skip("skipping 1GB integration test in short mode")
+	}
+	s.test100BlockFile(c, 10000000)
+}
+
+func (s *IntegrationSuite) Test300MBFile(c *check.C) {
+	s.test100BlockFile(c, 3000000)
+}
+
+func (s *IntegrationSuite) test100BlockFile(c *check.C, blocksize int) {
+	testdata := make([]byte, blocksize)
+	for i := 0; i < blocksize; i++ {
+		testdata[i] = byte(' ')
+	}
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.Equals, nil)
+	arv.ApiToken = arvadostest.ActiveToken
+	kc, err := keepclient.MakeKeepClient(&arv)
+	c.Assert(err, check.Equals, nil)
+	loc, _, err := kc.PutB(testdata[:])
+	c.Assert(err, check.Equals, nil)
+	mtext := "."
+	for i := 0; i < 100; i++ {
+		mtext = mtext + " " + loc
+	}
+	mtext = mtext + fmt.Sprintf(" 0:%d00:testdata.bin\n", blocksize)
+	coll := map[string]interface{}{}
+	err = arv.Create("collections",
+		map[string]interface{}{
+			"collection": map[string]interface{}{
+				"name": fmt.Sprintf("testdata blocksize=%d", blocksize),
+				"manifest_text": mtext,
+			},
+		}, &coll)
+	c.Assert(err, check.Equals, nil)
+	uuid := coll["uuid"].(string)
+
+	hdr, body, size := s.runCurl(c, arv.ApiToken, uuid + ".dl.example.com", "/testdata.bin")
+	c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+	c.Check(hdr, check.Matches, `(?si).*Content-length: `+fmt.Sprintf("%d00", blocksize)+`\r\n.*`)
+	c.Check([]byte(body)[:1234], check.DeepEquals, testdata[:1234])
+	c.Check(size, check.Equals, int64(blocksize)*100)
+}
+
 func (s *IntegrationSuite) Test200(c *check.C) {
 	anonymousTokens = []string{arvadostest.AnonymousToken}
 	arv, err := arvadosclient.MakeArvadosClient()
@@ -86,19 +135,13 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 		{"tokensobogus", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
 		{arvadostest.ActiveToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
 		{arvadostest.AnonymousToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		// Anonymously accessible user agreement. These should
-		// start working when CollectionFileReader provides
-		// real data instead of fake/stub data.
+		// Anonymously accessible user agreement.
 		{"", "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 		{arvadostest.ActiveToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 		{arvadostest.SpectatorToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 		{arvadostest.SpectatorToken, "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 	} {
-		hdr, body := s.runCurl(c, spec[0], spec[1])
-		if strings.HasPrefix(hdr, "HTTP/1.1 501 Not Implemented\r\n") && body == "" {
-			c.Log("Not implemented!")
-			continue
-		}
+		hdr, body, _ := s.runCurl(c, spec[0], "dl.example.com", spec[1])
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
 		if strings.HasSuffix(spec[1], ".txt") {
 			c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
@@ -111,15 +154,34 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 }
 
 // Return header block and body.
-func (s *IntegrationSuite) runCurl(c *check.C, token, uri string, args ...string) (hdr, body string) {
+func (s *IntegrationSuite) runCurl(c *check.C, token, host, uri string, args ...string) (hdr, bodyPart string, bodySize int64) {
 	curlArgs := []string{"--silent", "--show-error", "--include"}
+	testHost, testPort, _ := net.SplitHostPort(s.testServer.Addr)
+	curlArgs = append(curlArgs, "--resolve", host + ":" + testPort + ":" + testHost)
 	if token != "" {
 		curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
 	}
 	curlArgs = append(curlArgs, args...)
-	curlArgs = append(curlArgs, "http://"+s.testServer.Addr+uri)
+	curlArgs = append(curlArgs, "http://"+host+":"+testPort+uri)
 	c.Log(fmt.Sprintf("curlArgs == %#v", curlArgs))
-	output, err := exec.Command("curl", curlArgs...).CombinedOutput()
+	cmd := exec.Command("curl", curlArgs...)
+	stdout, err := cmd.StdoutPipe()
+	c.Assert(err, check.Equals, nil)
+	cmd.Stderr = cmd.Stdout
+	go cmd.Start()
+	buf := make([]byte, 2<<27)
+	n, err := io.ReadFull(stdout, buf)
+	// Discard (but measure size of) anything past 128 MiB.
+	var discarded int64
+	if err == io.ErrUnexpectedEOF {
+		err = nil
+		buf = buf[:n]
+	} else {
+		c.Assert(err, check.Equals, nil)
+		discarded, err = io.Copy(ioutil.Discard, stdout)
+		c.Assert(err, check.Equals, nil)
+	}
+	err = cmd.Wait()
 	// Without "-f", curl exits 0 as long as it gets a valid HTTP
 	// response from the server, even if the response status
 	// indicates that the request failed. In our test suite, we
@@ -127,10 +189,11 @@ func (s *IntegrationSuite) runCurl(c *check.C, token, uri string, args ...string
 	// headers ourselves. If curl exits non-zero, our testing
 	// environment is broken.
 	c.Assert(err, check.Equals, nil)
-	hdrsAndBody := strings.SplitN(string(output), "\r\n\r\n", 2)
+	hdrsAndBody := strings.SplitN(string(buf), "\r\n\r\n", 2)
 	c.Assert(len(hdrsAndBody), check.Equals, 2)
 	hdr = hdrsAndBody[0]
-	body = hdrsAndBody[1]
+	bodyPart = hdrsAndBody[1]
+	bodySize = int64(len(bodyPart)) + discarded
 	return
 }
 

commit 967632c79df826ab16a0449ff63b0b9f6d35c599
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Jul 23 01:20:28 2015 -0400

    5824: Support vhost-based collection lookups.

diff --git a/sdk/go/arvadostest/fixtures.go b/sdk/go/arvadostest/fixtures.go
index 87b28f8..3040e0a 100644
--- a/sdk/go/arvadostest/fixtures.go
+++ b/sdk/go/arvadostest/fixtures.go
@@ -7,6 +7,8 @@ const (
 	FooCollection         = "zzzzz-4zz18-fy296fx3hot09f7"
 	NonexistentCollection = "zzzzz-4zz18-totallynotexist"
 	HelloWorldCollection  = "zzzzz-4zz18-4en62shvi99lxd4"
+	FooPdh                = "1f4b0bc7583c2a7f9102c395f4ffc5e3+45"
+	HelloWorldPdh         = "55713e6a34081eb03609e7ad5fcad129+62"
 	PathologicalManifest  = ". acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 73feffa4b7f6bb68e44cf984c85f6e88+3+Z+K at xyzzy acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero at 0 0:1:f 1:0:zero at 1 1:4:ooba 4:0:zero at 4 5:1:r 5:4:rbaz 9:0:zero at 9\n" +
 		"./overlapReverse acbd18db4cc2f85cedef654fccc4a4d8+3 acbd18db4cc2f85cedef654fccc4a4d8+3 5:1:o 4:2:oo 2:4:ofoo\n" +
 		"./segmented acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 0:1:frob 5:1:frob 1:1:frob 1:2:oof 0:1:oof 5:0:frob 3:1:frob\n" +
diff --git a/sdk/go/auth/auth.go b/sdk/go/auth/auth.go
index 4a719e9..41cfb99 100644
--- a/sdk/go/auth/auth.go
+++ b/sdk/go/auth/auth.go
@@ -1,6 +1,7 @@
 package auth
 
 import (
+	"encoding/base64"
 	"net/http"
 	"net/url"
 	"strings"
@@ -20,6 +21,15 @@ func NewCredentialsFromHTTPRequest(r *http.Request) *Credentials {
 	return c
 }
 
+// EncodeTokenCookie accepts a token and returns a byte slice suitable
+// for use as a cookie value, such that it will be decoded correctly
+// by LoadTokensFromHTTPRequest.
+var EncodeTokenCookie func([]byte) string = base64.URLEncoding.EncodeToString
+
+// DecodeTokenCookie accepts a cookie value and returns the encoded
+// token.
+var DecodeTokenCookie func(string) ([]byte, error) = base64.URLEncoding.DecodeString
+
 // LoadTokensFromHttpRequest loads all tokens it can find in the
 // headers and query string of an http query.
 func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
@@ -51,6 +61,8 @@ func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
 		a.Tokens = append(a.Tokens, val...)
 	}
 
+	a.loadTokenFromCookie(r)
+
 	// TODO: Load token from Rails session cookie (if Rails site
 	// secret is known)
 }
@@ -59,3 +71,15 @@ func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
 // LoadTokensFromHttpRequest() that [or how] we should read and parse
 // the request body. This has to be requested explicitly by the
 // application.
+
+func (a *Credentials) loadTokenFromCookie(r *http.Request) {
+	cookie, err := r.Cookie("api_token")
+	if err != nil || len(cookie.Value) == 0 {
+		return
+	}
+	token, err := DecodeTokenCookie(cookie.Value)
+	if err != nil {
+		return
+	}
+	a.Tokens = append(a.Tokens, string(token))
+}
diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
index dbf4f5b..325668f 100644
--- a/services/keep-web/doc.go
+++ b/services/keep-web/doc.go
@@ -1,28 +1,158 @@
 // Keep-web provides read-only HTTP access to files stored in Keep. It
 // serves public data to anonymous and unauthenticated clients, and
-// accepts authentication via Arvados tokens. It can be installed
-// anywhere with access to Keep services, typically behind a web proxy
-// that provides SSL support.
+// serves private data to clients that supply Arvados API tokens. It
+// can be installed anywhere with access to Keep services, typically
+// behind a web proxy that supports TLS.
 //
-// Given that this amounts to a web hosting service for arbitrary
-// content, it is vital to ensure that at least one of the following is
-// true:
+// Starting the server
 //
-// Usage
-//
-// Listening:
+// Serve HTTP requests at port 1234 on all interfaces:
 //
 //   keep-web -address=:1234
 //
-// Start an HTTP server on port 1234.
+// Serve HTTP requests at port 1234 on the interface with IP address 1.2.3.4:
 //
 //   keep-web -address=1.2.3.4:1234
 //
-// Start an HTTP server on port 1234, on the interface with IP address 1.2.3.4.
+// Proxy configuration
 //
 // Keep-web does not support SSL natively. Typically, it is installed
 // behind a proxy like nginx.
 //
+// Here is an example nginx configuration.
+//
+//	http {
+//	  upstream keep-web {
+//	    server localhost:1234;
+//	  }
+//	  server {
+//	    listen *:443 ssl;
+//	    server_name dl.example.com *.dl.example.com ~.*--dl.example.com;
+//	    ssl_certificate /root/wildcard.example.com.crt;
+//	    ssl_certificate_key /root/wildcard.example.com.key;
+//	    location  / {
+//	      proxy_pass http://keep-web;
+//	      proxy_set_header Host $host;
+//	      proxy_set_header X-Forwarded-For $remote_addr;
+//	    }
+//	  }
+//	}
+//
+// It is not necessary to run keep-web on the same host as the nginx
+// proxy. However, TLS is not used between nginx and keep-web, so
+// intervening networks must be secured by other means.
+//
+// Download URLs
+//
+// The following "same origin" URL patterns are supported for public
+// collections (i.e., collections which can be served by keep-web
+// without making use of any credentials supplied by the client). See
+// "Same-origin mode" below.
+//
+//   http://dl.example.com/c=uuid_or_pdh/path/file.txt
+//   http://dl.example.com/c=uuid_or_pdh/path/t=TOKEN/file.txt
+//
+// The following "multiple origin" URL patterns are supported for all
+// collections:
+//
+//   http://uuid_or_pdh--dl.example.com/path/file.txt
+//   http://uuid_or_pdh--dl.example.com/t=/path/file.txt
+//   http://uuid_or_pdh--dl.example.com/t=TOKEN/path/file.txt
+//
+// In the "multiple origin" form, the string "--" can be replaced with
+// "." with identical results (assuming the upstream proxy is
+// configured accordingly). These two are equivalent:
+//
+//   http://uuid_or_pdh--dl.example.com/path/file.txt
+//   http://uuid_or_pdh.dl.example.com/path/file.txt
+//
+// The first form minimizes the cost and effort of deploying a
+// wildcard TLS certificate for *.dl.example.com. The second form is
+// likely to be easier to configure, and more efficient to run, on an
+// upstream proxy.
+//
+// In all of the above forms, the "dl.example.com" part can be
+// anything at all.
+//
+// In all of the above forms, the "uuid_or_pdh" part can be either a
+// collection UUID or a portable data hash with the "+" character
+// replaced by "-".
+//
+// Assuming there is a collection with UUID
+// zzzzz-4zz18-znfnqtbbv4spc3w and portable data hash
+// 1f4b0bc7583c2a7f9102c395f4ffc5e3+45, the following URLs are
+// interchangeable:
+//
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/t=/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/t=/foo
+//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--foo.example.com/foo
+//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--.invalid/foo
+//
+// Authorization mechanisms
+//
+// A token can be provided in an Authorization header:
+//
+//   Authorization: OAuth2 o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// A base64-encoded token can be provided in a cookie named "api_token":
+//
+//   Cookie: api_token=bzA3ajRweDdSbEpLNEN1TVlwN0MwTERUNEN6UjFKMXFCRTVBdm83ZUNjVWpPVGlreEs=
+//
+// A token can be provided in an URL-encoded query string:
+//
+//   GET /foo.txt?api_token=o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// A suitably encoded token can be provided in a POST body if the
+// request has a content type of application/x-www-form-urlencoded or
+// multipart/form-data:
+//
+//   POST /foo.txt
+//   Content-Type: application/x-www-form-urlencoded
+//   [...]
+//   api_token=o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// If a token is provided in a query string or in a POST request, the
+// response is an HTTP 303 redirect to an equivalent GET request, with
+// the token stripped from the query string and added to a cookie
+// instead.
+//
+// Compatibility
+//
+// Client-provided authorization tokens are ignored if the client does
+// not provide a Host header.
+//
+// In order to use the query string or a POST form authorization
+// mechanisms, the client must follow 303 redirects; the client must
+// accept cookies with a 303 response and send those cookies when
+// performing the redirect; and either the client or an intervening
+// proxy must resolve a relative URL ("//host/path") if given in a
+// response Location header.
+//
+// Intranet mode
+//
+// Normally, Keep-web accepts requests for multiple collections using
+// the same host name, provided the client's credentials are not being
+// used. This provides insufficient XSS protection in an installation
+// where the "anonymously accessible" data is not truly public, but
+// merely protected by network topology.
+//
+// In such cases -- for example, a site which is not reachable from
+// the internet, where some data is world-readable from Arvados's
+// perspective but is intended to be available only to users within
+// the local network -- the upstream proxy should configured to return
+// 401 for all paths beginning with "/c=".
+//
+// Same-origin mode
+//
+// Without the same-origin protection outlined above, a web page
+// stored in collection X could execute JavaScript code that uses the
+// current viewer's credentials to download additional data from
+// collection Y -- data which is accessible to the current viewer, but
+// not to the author of collection X -- from the same origin
+// (``https://dl.example.com/'') and upload it to some other site
+// chosen by the author of collection X.
+//
 package main
 
 // TODO(TC): Implement
@@ -31,7 +161,7 @@ package main
 //
 // Normally, Keep-web is installed using a wildcard DNS entry and a
 // wildcard HTTPS certificate, serving data from collection X at
-// ``https://X.dl.example.com/path/file.ext''.
+// ``https://X--dl.example.com/path/file.ext''.
 //
 // It will also serve publicly accessible data at
 // ``https://dl.example.com/collections/X/path/file.txt'', but it does not
@@ -48,10 +178,4 @@ package main
 //
 //   keep-web -trust-all-content [...]
 //
-// In the general case, this should not be enabled: A web page stored
-// in collection X can execute JavaScript code that uses the current
-// viewer's credentials to download additional data -- data which is
-// accessible to the current viewer, but not to the author of
-// collection X -- from the same origin (``https://dl.example.com/'')
-// and upload it to some other site chosen by the author of collection
-// X.
+// In the general case, this should not be enabled: 
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 04af920..03b3e26 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -2,11 +2,14 @@ package main
 
 import (
 	"fmt"
+	"html"
 	"io"
 	"mime"
 	"net/http"
+	"net/url"
 	"os"
 	"strings"
+	"time"
 
 	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
 	"git.curoverse.com/arvados.git/sdk/go/auth"
@@ -25,26 +28,49 @@ func init() {
 	anonymousTokens = []string{}
 }
 
+// return s if s is a UUID or a PDH, otherwise ""
+func parseCollectionIdFromDNSName(s string) string {
+	// Strip domain.
+	if i := strings.IndexRune(s, '.'); i >= 0 {
+		s = s[:i]
+	}
+	// Names like {uuid}--dl.example.com serve the same purpose as
+	// {uuid}.dl.example.com but can reduce cost/effort of using
+	// [additional] wildcard certificates.
+	if i := strings.Index(s, "--"); i >= 0 {
+		s = s[:i]
+	}
+	if !arvadosclient.UUIDMatch(s) && !arvadosclient.PDHMatch(s) {
+		return ""
+	}
+	return s
+}
+
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
-	var statusCode int
+	var statusCode = 0
 	var statusText string
 
 	w := httpserver.WrapResponseWriter(wOrig)
 	defer func() {
-		if statusCode > 0 {
-			if w.WroteStatus() == 0 {
-				w.WriteHeader(statusCode)
-			} else {
-				httpserver.Log(r.RemoteAddr, "WARNING",
-					fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
-			}
+		if statusCode == 0 {
+			statusCode = w.WroteStatus()
+		} else if w.WroteStatus() == 0 {
+			w.WriteHeader(statusCode)
+		} else if w.WroteStatus() != statusCode {
+			httpserver.Log(r.RemoteAddr, "WARNING",
+				fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
 		}
 		if statusText == "" {
 			statusText = http.StatusText(statusCode)
 		}
-		httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.URL.Path)
+		httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
 	}()
 
+	if r.Method != "GET" && r.Method != "POST" {
+		statusCode, statusText = http.StatusMethodNotAllowed, r.Method
+		return
+	}
+
 	arv := clientPool.Get()
 	if arv == nil {
 		statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
@@ -54,17 +80,70 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 
 	pathParts := strings.Split(r.URL.Path[1:], "/")
 
-	if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
-		statusCode = http.StatusNotFound
-		return
-	}
-
 	var targetId string
 	var targetPath []string
 	var tokens []string
 	var reqTokens []string
 	var pathToken bool
-	if len(pathParts) >= 5 && pathParts[1] == "download" {
+
+	if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
+		// "http://{id}.domain.example.com/{path}" form
+		if t := r.FormValue("api_token"); t != "" {
+			// ...with explicit token in query string or
+			// form in POST body. We must encrypt the
+			// token such that it can only be used for
+			// this collection; put it in an HttpOnly
+			// cookie; and redirect to the same URL with
+			// the query param redacted, and method =
+			// GET.
+			//
+			// The HttpOnly flag is necessary to prevent
+			// JavaScript code (included in, or loaded by,
+			// a page in the collection being served) from
+			// employing the user's token beyond reading
+			// other files in the same domain, i.e., same
+			// the collection.
+			//
+			// The 303 redirect is necessary in the case
+			// of a GET request to avoid exposing the
+			// token in the Location bar, and in the case
+			// of a POST request to avoid raising warnings
+			// when the user refreshes the resulting page.
+			http.SetCookie(w, &http.Cookie{
+				Name:    "api_token",
+				Value:   auth.EncodeTokenCookie([]byte(t)),
+				Path:    "/",
+				Expires: time.Now().AddDate(10,0,0),
+			})
+			redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
+
+			w.Header().Add("Location", redir)
+			statusCode, statusText = http.StatusSeeOther, redir
+			w.WriteHeader(statusCode)
+			io.WriteString(w, `<A href="`)
+			io.WriteString(w, html.EscapeString(redir))
+			io.WriteString(w, `">Continue</A>`)
+			return
+		} else if strings.HasPrefix(pathParts[0], "t=") {
+			// ...with explicit token in path,
+			// "{...}.com/t={token}/{path}".  This form
+			// must only be used to pass scoped tokens
+			// that give permission for a single
+			// collection. See FormValue case above.
+			tokens = []string{pathParts[0][2:]}
+			targetPath = pathParts[1:]
+			pathToken = true
+		} else {
+			// ...with cookie, Authorization header, or
+			// no token at all
+			reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+			tokens = append(reqTokens, anonymousTokens...)
+			targetPath = pathParts
+		}
+	} else if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
+		statusCode = http.StatusNotFound
+		return
+	} else if len(pathParts) >= 5 && pathParts[1] == "download" {
 		// "/collections/download/{id}/{token}/path..." form:
 		// Don't use our configured anonymous tokens,
 		// Authorization headers, etc.  Just use the token in
@@ -86,7 +165,6 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	found := false
 	for _, arv.ApiToken = range tokens {
 		err := arv.Get("collections", targetId, nil, &collection)
-		httpserver.Log(err)
 		if err == nil {
 			// Success
 			found = true
@@ -131,8 +209,8 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		// someone trying (anonymously) to download public
 		// data that has been deleted.  Allow a referrer to
 		// provide this context somehow?
-		statusCode = http.StatusUnauthorized
 		w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"")
+		statusCode = http.StatusUnauthorized
 		return
 	}
 
@@ -162,6 +240,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		}
 	}
 
+	w.WriteHeader(http.StatusOK)
 	_, err = io.Copy(w, rdr)
 	if err != nil {
 		statusCode, statusText = http.StatusBadGateway, err.Error()
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
new file mode 100644
index 0000000..a1f5e1a
--- /dev/null
+++ b/services/keep-web/handler_test.go
@@ -0,0 +1,218 @@
+package main
+
+import (
+	"html"
+	"io/ioutil"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"regexp"
+	"strings"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	"git.curoverse.com/arvados.git/sdk/go/auth"
+	check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&UnitSuite{})
+
+type UnitSuite struct {}
+
+func mustParseURL(s string) *url.URL {
+	r, err := url.Parse(s)
+	if err != nil {
+		panic("parse URL: " + s)
+	}
+	return r
+}
+
+func (s *IntegrationSuite) TestVhost404(c *check.C) {
+	for _, testURL := range []string{
+		arvadostest.NonexistentCollection + ".example.com/theperthcountyconspiracy",
+		arvadostest.NonexistentCollection + ".example.com/t=" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
+	} {
+		resp := httptest.NewRecorder()
+		req := &http.Request{
+			Method: "GET",
+			URL: mustParseURL(testURL),
+		}
+		(&handler{}).ServeHTTP(resp, req)
+		c.Check(resp.Code, check.Equals, http.StatusNotFound)
+		c.Check(resp.Body.String(), check.Equals, "")
+	}
+}
+
+type authorizer func(*http.Request, string) int
+
+func (s *IntegrationSuite) TestVhostViaAuthzHeader(c *check.C) {
+	doVhostRequests(c, authzViaAuthzHeader)
+}
+func authzViaAuthzHeader(r *http.Request, tok string) int {
+	r.Header.Add("Authorization", "OAuth2 " + tok)
+	return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaCookieValue(c *check.C) {
+	doVhostRequests(c, authzViaCookieValue)
+}
+func authzViaCookieValue(r *http.Request, tok string) int {
+	r.AddCookie(&http.Cookie{
+		Name: "api_token",
+		Value: auth.EncodeTokenCookie([]byte(tok)),
+	})
+	return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaPath(c *check.C) {
+	doVhostRequests(c, authzViaPath)
+}
+func authzViaPath(r *http.Request, tok string) int {
+	r.URL.Path = "/t=" + tok + r.URL.Path
+	return http.StatusNotFound
+}
+
+func (s *IntegrationSuite) TestVhostViaQueryString(c *check.C) {
+	doVhostRequests(c, authzViaQueryString)
+}
+func authzViaQueryString(r *http.Request, tok string) int {
+	r.URL.RawQuery = "api_token=" + tok
+	return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaPOST(c *check.C) {
+	doVhostRequests(c, authzViaPOST)
+}
+func authzViaPOST(r *http.Request, tok string) int {
+	r.Method = "POST"
+	r.Header.Add("Content-Type", "application/x-www-form-urlencoded")
+	r.Body = ioutil.NopCloser(strings.NewReader(
+		url.Values{"api_token": {tok}}.Encode()))
+	return http.StatusUnauthorized
+}
+
+// Try some combinations of {url, token} using the given authorization
+// mechanism, and verify the result is correct.
+func doVhostRequests(c *check.C, authz authorizer) {
+	hostPath := arvadostest.FooCollection + ".example.com/foo"
+	for _, tok := range []string{
+		arvadostest.ActiveToken,
+		arvadostest.ActiveToken[:15],
+		arvadostest.SpectatorToken,
+		"bogus",
+		"",
+	} {
+		u := mustParseURL("http://" + hostPath)
+		req := &http.Request{
+			Method: "GET",
+			Host: u.Host,
+			URL: u,
+			Header: http.Header{},
+		}
+		failCode := authz(req, tok)
+		resp := doReq(req)
+		code, body := resp.Code, resp.Body.String()
+		if tok == arvadostest.ActiveToken {
+			c.Check(code, check.Equals, http.StatusOK)
+			c.Check(body, check.Equals, "foo")
+		} else {
+			c.Check(code >= 400, check.Equals, true)
+			c.Check(code < 500, check.Equals, true)
+			if tok == arvadostest.SpectatorToken {
+				// Valid token never offers to retry
+				// with different credentials.
+				c.Check(code, check.Equals, http.StatusNotFound)
+			} else {
+				// Invalid token can ask to retry
+				// depending on the authz method.
+				c.Check(code, check.Equals, failCode)
+			}
+			c.Check(body, check.Equals, "")
+		}
+	}
+}
+
+func doReq(req *http.Request) *httptest.ResponseRecorder {
+	resp := httptest.NewRecorder()
+	(&handler{}).ServeHTTP(resp, req)
+	if resp.Code != http.StatusSeeOther {
+		return resp
+	}
+	cookies := (&http.Response{Header: resp.Header()}).Cookies()
+	u, _ := req.URL.Parse(resp.Header().Get("Location"))
+	req = &http.Request{
+		Method: "GET",
+		Host: u.Host,
+		URL: u,
+		Header: http.Header{},
+	}
+	for _, c := range cookies {
+		req.AddCookie(c)
+	}
+	return doReq(req)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
+	s.testVhostRedirectTokenToCookie(c, "GET",
+		arvadostest.FooCollection + ".example.com/foo",
+		"?api_token=" + arvadostest.ActiveToken,
+		"text/plain",
+		"",
+		http.StatusOK,
+	)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
+	s.testVhostRedirectTokenToCookie(c, "POST",
+		arvadostest.FooCollection + ".example.com/foo",
+		"",
+		"application/x-www-form-urlencoded",
+		url.Values{"api_token": {arvadostest.ActiveToken}}.Encode(),
+		http.StatusOK,
+	)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C) {
+	s.testVhostRedirectTokenToCookie(c, "POST",
+		arvadostest.FooCollection + ".example.com/foo",
+		"",
+		"application/x-www-form-urlencoded",
+		url.Values{"api_token": {arvadostest.SpectatorToken}}.Encode(),
+		http.StatusNotFound,
+	)
+}
+
+func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString, contentType, body string, expectStatus int) {
+	u, _ := url.Parse(`http://` + hostPath + queryString)
+	req := &http.Request{
+		Method: method,
+		Host: u.Host,
+		URL: u,
+		Header: http.Header{"Content-Type": {contentType}},
+		Body: ioutil.NopCloser(strings.NewReader(body)),
+	}
+
+	resp := httptest.NewRecorder()
+	(&handler{}).ServeHTTP(resp, req)
+	c.Assert(resp.Code, check.Equals, http.StatusSeeOther)
+	c.Check(resp.Body.String(), check.Matches, `.*href="//` + regexp.QuoteMeta(html.EscapeString(hostPath)) + `".*`)
+	cookies := (&http.Response{Header: resp.Header()}).Cookies()
+
+	u, _ = u.Parse(resp.Header().Get("Location"))
+	req = &http.Request{
+		Method: "GET",
+		Host: u.Host,
+		URL: u,
+		Header: http.Header{},
+	}
+	for _, c := range cookies {
+		req.AddCookie(c)
+	}
+
+	resp = httptest.NewRecorder()
+	(&handler{}).ServeHTTP(resp, req)
+	c.Check(resp.Header().Get("Location"), check.Equals, "")
+	c.Check(resp.Code, check.Equals, expectStatus)
+	if expectStatus == http.StatusOK {
+		c.Check(resp.Body.String(), check.Equals, "foo")
+	}
+}

commit d1289b1021929fbbba35027f70a33e05cb594bb3
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jun 24 23:33:08 2015 -0400

    5824: add (*KeepClient)CollectionFileReader()

diff --git a/sdk/go/arvadostest/fixtures.go b/sdk/go/arvadostest/fixtures.go
new file mode 100644
index 0000000..87b28f8
--- /dev/null
+++ b/sdk/go/arvadostest/fixtures.go
@@ -0,0 +1,17 @@
+package arvadostest
+
+const (
+	SpectatorToken        = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
+	ActiveToken           = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
+	AnonymousToken        = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
+	FooCollection         = "zzzzz-4zz18-fy296fx3hot09f7"
+	NonexistentCollection = "zzzzz-4zz18-totallynotexist"
+	HelloWorldCollection  = "zzzzz-4zz18-4en62shvi99lxd4"
+	PathologicalManifest  = ". acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 73feffa4b7f6bb68e44cf984c85f6e88+3+Z+K at xyzzy acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero at 0 0:1:f 1:0:zero at 1 1:4:ooba 4:0:zero at 4 5:1:r 5:4:rbaz 9:0:zero at 9\n" +
+		"./overlapReverse acbd18db4cc2f85cedef654fccc4a4d8+3 acbd18db4cc2f85cedef654fccc4a4d8+3 5:1:o 4:2:oo 2:4:ofoo\n" +
+		"./segmented acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 0:1:frob 5:1:frob 1:1:frob 1:2:oof 0:1:oof 5:0:frob 3:1:frob\n" +
+		`./foo\040b\141r acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:baz` + "\n" +
+		`./foo\040b\141r acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:b\141z\040w\141z` + "\n" +
+		"./foo acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero 0:3:foo\n" +
+		". acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:foo/zero 0:3:foo/foo\n"
+)
diff --git a/sdk/go/keepclient/collectionreader.go b/sdk/go/keepclient/collectionreader.go
new file mode 100644
index 0000000..5db944c
--- /dev/null
+++ b/sdk/go/keepclient/collectionreader.go
@@ -0,0 +1,159 @@
+package keepclient
+
+import (
+	"errors"
+	"io"
+	"os"
+
+	"git.curoverse.com/arvados.git/sdk/go/manifest"
+)
+
+// ErrNoManifest indicates the given collection has no manifest
+// information (e.g., manifest_text was excluded by a "select"
+// parameter when retrieving the collection record).
+var ErrNoManifest = errors.New("Collection has no manifest")
+
+// CollectionFileReader returns an io.Reader that reads file content
+// from a collection. The filename must be given relative to the root
+// of the collection, without a leading "./".
+func (kc *KeepClient) CollectionFileReader(collection map[string]interface{}, filename string) (*cfReader, error) {
+	mText, ok := collection["manifest_text"].(string)
+	if !ok {
+		return nil, ErrNoManifest
+	}
+	m := manifest.Manifest{Text: mText}
+	rdrChan := make(chan *cfReader)
+	go func() {
+		// q is a queue of FileSegments that we have received but
+		// haven't yet been able to send to toGet.
+		var q []*manifest.FileSegment
+		var r *cfReader
+		for seg := range m.FileSegmentIterByName(filename) {
+			if r == nil {
+				// We've just discovered that the
+				// requested filename does appear in
+				// the manifest, so we can return a
+				// real reader (not nil) from
+				// CollectionFileReader().
+				r = newCFReader(kc)
+				rdrChan <- r
+			}
+			q = append(q, seg)
+			r.totalSize += uint64(seg.Len)
+			// Send toGet whatever it's ready to receive.
+			Q: for len(q) > 0 {
+				select {
+				case r.toGet <- q[0]:
+					q = q[1:]
+				default:
+					break Q
+				}
+			}
+		}
+		if r == nil {
+			// File not found
+			rdrChan <- nil
+			return
+		}
+		close(r.countDone)
+		for _, seg := range q {
+			r.toGet <- seg
+		}
+		close(r.toGet)
+	}()
+	// Before returning a reader, wait until we know whether the
+	// file exists here:
+	r := <-rdrChan
+	if r == nil {
+		return nil, os.ErrNotExist
+	}
+	return r, nil
+}
+
+type cfReader struct {
+	keepClient *KeepClient
+	// doGet() reads FileSegments from toGet, gets the data from
+	// Keep, and sends byte slices to toRead to be consumed by
+	// Read().
+	toGet        chan *manifest.FileSegment
+	toRead       chan []byte
+	// bytes ready to send next time someone calls Read()
+	buf          []byte
+	// Total size of the file being read. Not safe to read this
+	// until countDone is closed.
+	totalSize    uint64
+	countDone    chan struct{}
+	// First error encountered.
+	err          error
+}
+
+func (r *cfReader) Read(outbuf []byte) (n int, err error) {
+	if r.err != nil {
+		return 0, r.err
+	}
+	for r.buf == nil || len(r.buf) == 0 {
+		var ok bool
+		r.buf, ok = <-r.toRead
+		if r.err != nil {
+			return 0, r.err
+		} else if !ok {
+			return 0, io.EOF
+		}
+	}
+	if len(r.buf) > len(outbuf) {
+		n = len(outbuf)
+	} else {
+		n = len(r.buf)
+	}
+	copy(outbuf[:n], r.buf[:n])
+	r.buf = r.buf[n:]
+	return
+}
+
+func (r *cfReader) Close() error {
+	_, _ = <-r.countDone
+	for _ = range r.toGet {
+	}
+	for _ = range r.toRead {
+	}
+	return r.err
+}
+
+func (r *cfReader) Len() uint64 {
+	// Wait for all segments to be counted
+	_, _ = <-r.countDone
+	return r.totalSize
+}
+
+func (r *cfReader) doGet() {
+	defer close(r.toRead)
+	for fs := range r.toGet {
+		rdr, _, _, err := r.keepClient.Get(fs.Locator)
+		if err != nil {
+			r.err = err
+			return
+		}
+		var buf = make([]byte, fs.Offset+fs.Len)
+		_, err = io.ReadFull(rdr, buf)
+		if err != nil {
+			r.err = err
+			return
+		}
+		for bOff, bLen := fs.Offset, 1<<20; bOff <= fs.Offset+fs.Len && bLen > 0; bOff += bLen {
+			if bOff+bLen > fs.Offset+fs.Len {
+				bLen = fs.Offset + fs.Len - bOff
+			}
+			r.toRead <- buf[bOff : bOff+bLen]
+		}
+	}
+}
+
+func newCFReader(kc *KeepClient) (r *cfReader) {
+	r = new(cfReader)
+	r.keepClient = kc
+	r.toGet = make(chan *manifest.FileSegment, 2)
+	r.toRead = make(chan []byte)
+	r.countDone = make(chan struct{})
+	go r.doGet()
+	return
+}
diff --git a/sdk/go/keepclient/collectionreader_test.go b/sdk/go/keepclient/collectionreader_test.go
new file mode 100644
index 0000000..f271208
--- /dev/null
+++ b/sdk/go/keepclient/collectionreader_test.go
@@ -0,0 +1,123 @@
+package keepclient
+
+import (
+	"crypto/md5"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"net/http"
+	"os"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&IntegrationSuite{})
+
+// IntegrationSuite tests need an API server
+type IntegrationSuite struct{}
+
+type SuccessHandler struct {
+	disk map[string][]byte
+	lock chan struct{}
+}
+
+func (h SuccessHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+	switch req.Method {
+	case "PUT":
+		buf, err := ioutil.ReadAll(req.Body)
+		if err != nil {
+			resp.WriteHeader(500)
+			return
+		}
+		pdh := fmt.Sprintf("%x+%d", md5.Sum(buf), len(buf))
+		h.lock <- struct{}{}
+		h.disk[pdh] = buf
+		<- h.lock
+		resp.Write([]byte(pdh))
+	case "GET":
+		pdh := req.URL.Path[1:]
+		h.lock <- struct{}{}
+		buf, ok := h.disk[pdh]
+		<- h.lock
+		if !ok {
+			resp.WriteHeader(http.StatusNotFound)
+		} else {
+			resp.Write(buf)
+		}
+	default:
+		resp.WriteHeader(http.StatusMethodNotAllowed)
+	}
+}
+
+type rdrTest struct {
+	mt   string      // manifest text
+	f    string      // filename
+	want interface{} // error or string to expect
+}
+
+func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.IsNil)
+	arv.ApiToken = arvadostest.ActiveToken
+
+	kc, err := MakeKeepClient(&arv)
+	c.Assert(err, check.IsNil)
+
+	{
+		localRoots := make(map[string]string)
+		h := SuccessHandler{disk: make(map[string][]byte), lock: make(chan struct{}, 1)}
+		for i, k := range RunSomeFakeKeepServers(h, 4) {
+			localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+		}
+		kc.SetServiceRoots(localRoots, localRoots, nil)
+		kc.PutB([]byte("foo"))
+		kc.PutB([]byte("bar"))
+		kc.PutB([]byte("Hello world\n"))
+		kc.PutB([]byte(""))
+	}
+
+	mt := arvadostest.PathologicalManifest
+
+	for _, testCase := range []rdrTest{
+		{mt: mt, f: "zzzz", want: os.ErrNotExist},
+		{mt: mt, f: "frob", want: os.ErrNotExist},
+		{mt: mt, f: "/segmented/frob", want: os.ErrNotExist},
+		{mt: mt, f: "./segmented/frob", want: os.ErrNotExist},
+		{mt: mt, f: "/f", want: os.ErrNotExist},
+		{mt: mt, f: "./f", want: os.ErrNotExist},
+		{mt: mt, f: "foo bar//baz", want: os.ErrNotExist},
+		{mt: mt, f: "foo/zero", want: ""},
+		{mt: mt, f: "zero at 0", want: ""},
+		{mt: mt, f: "zero at 1", want: ""},
+		{mt: mt, f: "zero at 4", want: ""},
+		{mt: mt, f: "zero at 9", want: ""},
+		{mt: mt, f: "f", want: "f"},
+		{mt: mt, f: "ooba", want: "ooba"},
+		{mt: mt, f: "overlapReverse/o", want: "o"},
+		{mt: mt, f: "overlapReverse/oo", want: "oo"},
+		{mt: mt, f: "overlapReverse/ofoo", want: "ofoo"},
+		{mt: mt, f: "foo bar/baz", want: "foo"},
+		{mt: mt, f: "segmented/frob", want: "frob"},
+		{mt: mt, f: "segmented/oof", want: "oof"},
+	} {
+		rdr, err := kc.CollectionFileReader(map[string]interface{}{"manifest_text": testCase.mt}, testCase.f)
+		switch want := testCase.want.(type) {
+		case error:
+			c.Check(rdr, check.IsNil)
+			c.Check(err, check.Equals, want)
+		case string:
+			buf := make([]byte, len(want))
+			n, err := io.ReadFull(rdr, buf)
+			c.Check(err, check.IsNil)
+			for i := 0; i < 4; i++ {
+				c.Check(string(buf), check.Equals, want)
+				n, err = rdr.Read(buf)
+				c.Check(n, check.Equals, 0)
+				c.Check(err, check.Equals, io.EOF)
+			}
+			c.Check(rdr.Close(), check.Equals, nil)
+		}
+	}
+}
diff --git a/sdk/go/manifest/manifest.go b/sdk/go/manifest/manifest.go
index 4e816cd..f104d9a 100644
--- a/sdk/go/manifest/manifest.go
+++ b/sdk/go/manifest/manifest.go
@@ -5,25 +5,185 @@
 package manifest
 
 import (
+	"errors"
+	"fmt"
 	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
 	"log"
+	"regexp"
+	"strconv"
 	"strings"
 )
 
+var ErrInvalidToken = errors.New("Invalid token")
+
+var LocatorPattern = regexp.MustCompile(
+	"^[0-9a-fA-F]{32}\\+[0-9]+(\\+[A-Z][A-Za-z0-9 at _-]+)*$")
+
 type Manifest struct {
 	Text string
 }
 
+type BlockLocator struct {
+	Digest blockdigest.BlockDigest
+	Size   int
+	Hints  []string
+}
+
+type DataSegment struct {
+	BlockLocator
+	Locator      string
+	StreamOffset uint64
+}
+
+// FileSegment is a portion of a file that is contained within a
+// single block.
+type FileSegment struct {
+	Locator string
+	// Offset (within this block) of this data segment
+	Offset int
+	Len    int
+}
+
 // Represents a single line from a manifest.
 type ManifestStream struct {
 	StreamName string
 	Blocks     []string
-	Files      []string
+	FileTokens []string
+}
+
+var escapeSeq = regexp.MustCompile(`\\([0-9]{3}|\\)`)
+
+func unescapeSeq(seq string) string {
+	if seq == `\\` {
+		return `\`
+	}
+	i, err := strconv.ParseUint(seq[1:], 8, 8)
+	if err != nil {
+		// Invalid escape sequence: can't unescape.
+		return seq
+	}
+	return string([]byte{byte(i)})
+}
+
+func UnescapeName(s string) string {
+	return escapeSeq.ReplaceAllStringFunc(s, unescapeSeq)
+}
+
+func ParseBlockLocator(s string) (b BlockLocator, err error) {
+	if !LocatorPattern.MatchString(s) {
+		err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern "+
+			"\"%s\".",
+			s,
+			LocatorPattern.String())
+	} else {
+		tokens := strings.Split(s, "+")
+		var blockSize int64
+		var blockDigest blockdigest.BlockDigest
+		// We expect both of the following to succeed since LocatorPattern
+		// restricts the strings appropriately.
+		blockDigest, err = blockdigest.FromString(tokens[0])
+		if err != nil {
+			return
+		}
+		blockSize, err = strconv.ParseInt(tokens[1], 10, 0)
+		if err != nil {
+			return
+		}
+		b.Digest = blockDigest
+		b.Size = int(blockSize)
+		b.Hints = tokens[2:]
+	}
+	return
+}
+
+func parseFileToken(tok string) (segPos, segLen uint64, name string, err error) {
+	parts := strings.SplitN(tok, ":", 3)
+	if len(parts) != 3 {
+		err = ErrInvalidToken
+		return
+	}
+	segPos, err = strconv.ParseUint(parts[0], 10, 64)
+	if err != nil {
+		return
+	}
+	segLen, err = strconv.ParseUint(parts[1], 10, 64)
+	if err != nil {
+		return
+	}
+	name = UnescapeName(parts[2])
+	return
+}
+
+func (s *ManifestStream) FileSegmentIterByName(filepath string) <-chan *FileSegment {
+	ch := make(chan *FileSegment)
+	go func() {
+		s.sendFileSegmentIterByName(filepath, ch)
+		close(ch)
+	}()
+	return ch
+}
+
+func (s *ManifestStream) sendFileSegmentIterByName(filepath string, ch chan<- *FileSegment) {
+	blockLens := make([]int, 0, len(s.Blocks))
+	// This is what streamName+"/"+fileName will look like:
+	target := "./" + filepath
+	for _, fTok := range s.FileTokens {
+		wantPos, wantLen, name, err := parseFileToken(fTok)
+		if err != nil {
+			// Skip (!) invalid file tokens.
+			continue
+		}
+		if s.StreamName+"/"+name != target {
+			continue
+		}
+		if wantLen == 0 {
+			ch <- &FileSegment{Locator: "d41d8cd98f00b204e9800998ecf8427e+0", Offset: 0, Len: 0}
+			continue
+		}
+		// Linear search for blocks containing data for this
+		// file
+		var blockPos uint64 = 0 // position of block in stream
+		for i, loc := range s.Blocks {
+			if blockPos >= wantPos+wantLen {
+				break
+			}
+			if len(blockLens) <= i {
+				blockLens = blockLens[:i+1]
+				b, err := ParseBlockLocator(loc)
+				if err != nil {
+					// Unparseable locator -> unusable
+					// stream.
+					ch <- nil
+					return
+				}
+				blockLens[i] = b.Size
+			}
+			blockLen := uint64(blockLens[i])
+			if blockPos+blockLen <= wantPos {
+				blockPos += blockLen
+				continue
+			}
+			fseg := FileSegment{
+				Locator: loc,
+				Offset:  0,
+				Len:     blockLens[i],
+			}
+			if blockPos < wantPos {
+				fseg.Offset = int(wantPos - blockPos)
+				fseg.Len -= fseg.Offset
+			}
+			if blockPos+blockLen > wantPos+wantLen {
+				fseg.Len = int(wantPos+wantLen-blockPos) - fseg.Offset
+			}
+			ch <- &fseg
+			blockPos += blockLen
+		}
+	}
 }
 
 func parseManifestStream(s string) (m ManifestStream) {
 	tokens := strings.Split(s, " ")
-	m.StreamName = tokens[0]
+	m.StreamName = UnescapeName(tokens[0])
 	tokens = tokens[1:]
 	var i int
 	for i = range tokens {
@@ -32,7 +192,7 @@ func parseManifestStream(s string) (m ManifestStream) {
 		}
 	}
 	m.Blocks = tokens[:i]
-	m.Files = tokens[i:]
+	m.FileTokens = tokens[i:]
 	return
 }
 
@@ -58,6 +218,20 @@ func (m *Manifest) StreamIter() <-chan ManifestStream {
 	return ch
 }
 
+func (m *Manifest) FileSegmentIterByName(filepath string) <-chan *FileSegment {
+	ch := make(chan *FileSegment)
+	go func() {
+		for stream := range m.StreamIter() {
+			if !strings.HasPrefix("./"+filepath, stream.StreamName+"/") {
+				continue
+			}
+			stream.sendFileSegmentIterByName(filepath, ch)
+		}
+		close(ch)
+	}()
+	return ch
+}
+
 // Blocks may appear mulitple times within the same manifest if they
 // are used by multiple files. In that case this Iterator will output
 // the same block multiple times.
diff --git a/sdk/go/manifest/manifest_test.go b/sdk/go/manifest/manifest_test.go
index 8cfe3d9..364648d 100644
--- a/sdk/go/manifest/manifest_test.go
+++ b/sdk/go/manifest/manifest_test.go
@@ -1,10 +1,13 @@
 package manifest
 
 import (
-	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
 	"io/ioutil"
+	"reflect"
 	"runtime"
 	"testing"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
 )
 
 func getStackTrace() string {
@@ -60,7 +63,7 @@ func expectStringSlicesEqual(t *testing.T, actual []string, expected []string) {
 func expectManifestStream(t *testing.T, actual ManifestStream, expected ManifestStream) {
 	expectEqual(t, actual.StreamName, expected.StreamName)
 	expectStringSlicesEqual(t, actual.Blocks, expected.Blocks)
-	expectStringSlicesEqual(t, actual.Files, expected.Files)
+	expectStringSlicesEqual(t, actual.FileTokens, expected.FileTokens)
 }
 
 func expectBlockLocator(t *testing.T, actual blockdigest.BlockLocator, expected blockdigest.BlockLocator) {
@@ -72,8 +75,19 @@ func expectBlockLocator(t *testing.T, actual blockdigest.BlockLocator, expected
 func TestParseManifestStreamSimple(t *testing.T) {
 	m := parseManifestStream(". 365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf 0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt")
 	expectManifestStream(t, m, ManifestStream{StreamName: ".",
-		Blocks: []string{"365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
-		Files:  []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
+		Blocks:     []string{"365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
+		FileTokens: []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
+}
+
+func TestParseBlockLocatorSimple(t *testing.T) {
+	b, err := ParseBlockLocator("365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf")
+	if err != nil {
+		t.Fatalf("Unexpected error parsing block locator: %v", err)
+	}
+	expectBlockLocator(t, b, BlockLocator{Digest: blockdigest.AssertFromString("365f83f5f808896ec834c8b595288735"),
+		Size: 2310,
+		Hints: []string{"K at qr1hi",
+			"Af0c9a66381f3b028677411926f0be1c6282fe67c at 542b5ddf"}})
 }
 
 func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
@@ -88,8 +102,8 @@ func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
 	expectManifestStream(t,
 		firstStream,
 		ManifestStream{StreamName: ".",
-			Blocks: []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475 at 5441920c"},
-			Files:  []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
+			Blocks:     []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475 at 5441920c"},
+			FileTokens: []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
 
 	received, ok := <-streamIter
 	if ok {
@@ -126,3 +140,58 @@ func TestBlockIterLongManifest(t *testing.T) {
 			Size:  31367794,
 			Hints: []string{"E53f903684239bcc114f7bf8ff9bd6089f33058db at 5441920c"}})
 }
+
+func TestUnescape(t *testing.T) {
+	for _, testCase := range [][]string{
+		{`\040`, ` `},
+		{`\009`, `\009`},
+		{`\\\040\\`, `\ \`},
+		{`\\040\`, `\040\`},
+	} {
+		in := testCase[0]
+		expect := testCase[1]
+		got := UnescapeName(in)
+		if expect != got {
+			t.Errorf("For '%s' got '%s' instead of '%s'", in, got, expect)
+		}
+	}
+}
+
+type fsegtest struct {
+	mt   string        // manifest text
+	f    string        // filename
+	want []FileSegment // segments should be received on channel
+}
+
+func TestFileSegmentIterByName(t *testing.T) {
+	mt := arvadostest.PathologicalManifest
+	for _, testCase := range []fsegtest{
+		{mt: mt, f: "zzzz", want: nil},
+		// This case is too sensitive: it would be acceptable
+		// (even preferable) to return only one empty segment.
+		{mt: mt, f: "foo/zero", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}, {"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "zero at 0", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "zero at 1", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "zero at 4", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "zero at 9", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "f", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}}},
+		{mt: mt, f: "ooba", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}, {"37b51d194a7513e45b56f6524f2d51f2+3", 0, 2}}},
+		{mt: mt, f: "overlapReverse/o", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 2, 1}}},
+		{mt: mt, f: "overlapReverse/oo", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}}},
+		{mt: mt, f: "overlapReverse/ofoo", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 2, 1}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 3}}},
+		{mt: mt, f: "foo bar/baz", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 3}}},
+		// This case is too sensitive: it would be better to
+		// omit the empty segment.
+		{mt: mt, f: "segmented/frob", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}, {"37b51d194a7513e45b56f6524f2d51f2+3", 2, 1}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 1}, {"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}, {"37b51d194a7513e45b56f6524f2d51f2+3", 0, 1}}},
+		{mt: mt, f: "segmented/oof", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}}},
+	} {
+		m := Manifest{Text: testCase.mt}
+		var got []FileSegment
+		for fs := range m.FileSegmentIterByName(testCase.f) {
+			got = append(got, *fs)
+		}
+		if !reflect.DeepEqual(got, testCase.want) {
+			t.Errorf("For %#v:\n got  %#v\n want %#v", testCase.f, got, testCase.want)
+		}
+	}
+}
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 48e3640..04af920 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -11,6 +11,7 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
 	"git.curoverse.com/arvados.git/sdk/go/auth"
 	"git.curoverse.com/arvados.git/sdk/go/httpserver"
+	"git.curoverse.com/arvados.git/sdk/go/keepclient"
 )
 
 var clientPool = arvadosclient.MakeClientPool()
@@ -136,17 +137,20 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	}
 
 	filename := strings.Join(targetPath, "/")
-	rdr, err := arvadosclient.CollectionFileReader(collection, filename)
+	kc, err := keepclient.MakeKeepClient(arv)
+	if err != nil {
+		statusCode, statusText = http.StatusInternalServerError, err.Error()
+		return
+	}
+	rdr, err := kc.CollectionFileReader(collection, filename)
 	if os.IsNotExist(err) {
 		statusCode = http.StatusNotFound
 		return
-	} else if err == arvadosclient.ErrNotImplemented {
-		statusCode = http.StatusNotImplemented
-		return
 	} else if err != nil {
 		statusCode, statusText = http.StatusBadGateway, err.Error()
 		return
 	}
+	defer rdr.Close()
 
 	// One or both of these can be -1 if not found:
 	basenamePos := strings.LastIndex(filename, "/")
diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
index 66c6812..b4d6d17 100644
--- a/services/keep-web/server_test.go
+++ b/services/keep-web/server_test.go
@@ -15,16 +15,7 @@ import (
 
 var _ = check.Suite(&IntegrationSuite{})
 
-const (
-	spectatorToken  = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
-	activeToken     = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
-	anonymousToken  = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
-	fooCollection   = "zzzzz-4zz18-fy296fx3hot09f7"
-	bogusCollection = "zzzzz-4zz18-totallynotexist"
-	hwCollection    = "zzzzz-4zz18-4en62shvi99lxd4"
-)
-
-// IntegrationSuite tests need an API server and an arv-git-httpd server
+// IntegrationSuite tests need an API server and a keep-web server
 type IntegrationSuite struct {
 	testServer *server
 }
@@ -34,12 +25,12 @@ func (s *IntegrationSuite) TestNoToken(c *check.C) {
 		"",
 		"bogustoken",
 	} {
-		hdr, body := s.runCurl(c, token, "/collections/"+fooCollection+"/foo")
+		hdr, body := s.runCurl(c, token, "/collections/"+arvadostest.FooCollection+"/foo")
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
 		c.Check(body, check.Equals, "")
 
 		if token != "" {
-			hdr, body = s.runCurl(c, token, "/collections/download/"+fooCollection+"/"+token+"/foo")
+			hdr, body = s.runCurl(c, token, "/collections/download/"+arvadostest.FooCollection+"/"+token+"/foo")
 			c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 			c.Check(body, check.Equals, "")
 		}
@@ -62,46 +53,46 @@ func (s *IntegrationSuite) Test404(c *check.C) {
 		"/download",
 		"/collections",
 		"/collections/",
-		"/collections/" + fooCollection,
-		"/collections/" + fooCollection + "/",
+		"/collections/" + arvadostest.FooCollection,
+		"/collections/" + arvadostest.FooCollection + "/",
 		// Non-existent file in collection
-		"/collections/" + fooCollection + "/theperthcountyconspiracy",
-		"/collections/download/" + fooCollection + "/" + activeToken + "/theperthcountyconspiracy",
+		"/collections/" + arvadostest.FooCollection + "/theperthcountyconspiracy",
+		"/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
 		// Non-existent collection
-		"/collections/" + bogusCollection,
-		"/collections/" + bogusCollection + "/",
-		"/collections/" + bogusCollection + "/theperthcountyconspiracy",
-		"/collections/download/" + bogusCollection + "/" + activeToken + "/theperthcountyconspiracy",
+		"/collections/" + arvadostest.NonexistentCollection,
+		"/collections/" + arvadostest.NonexistentCollection + "/",
+		"/collections/" + arvadostest.NonexistentCollection + "/theperthcountyconspiracy",
+		"/collections/download/" + arvadostest.NonexistentCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
 	} {
-		hdr, body := s.runCurl(c, activeToken, uri)
+		hdr, body := s.runCurl(c, arvadostest.ActiveToken, uri)
 		c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
 		c.Check(body, check.Equals, "")
 	}
 }
 
 func (s *IntegrationSuite) Test200(c *check.C) {
-	anonymousTokens = []string{anonymousToken}
+	anonymousTokens = []string{arvadostest.AnonymousToken}
 	arv, err := arvadosclient.MakeArvadosClient()
 	c.Assert(err, check.Equals, nil)
-	arv.ApiToken = activeToken
+	arv.ApiToken = arvadostest.ActiveToken
 	kc, err := keepclient.MakeKeepClient(&arv)
 	c.Assert(err, check.Equals, nil)
 	kc.PutB([]byte("Hello world\n"))
 	kc.PutB([]byte("foo"))
 	for _, spec := range [][]string{
 		// My collection
-		{activeToken, "/collections/" + fooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{"", "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{"tokensobogus", "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{activeToken, "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{anonymousToken, "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{arvadostest.ActiveToken, "/collections/" + arvadostest.FooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{"", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{"tokensobogus", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{arvadostest.ActiveToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{arvadostest.AnonymousToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
 		// Anonymously accessible user agreement. These should
 		// start working when CollectionFileReader provides
 		// real data instead of fake/stub data.
-		{"", "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{activeToken, "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{spectatorToken, "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{spectatorToken, "/collections/download/"+hwCollection+"/"+spectatorToken+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{"", "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{arvadostest.ActiveToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{arvadostest.SpectatorToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{arvadostest.SpectatorToken, "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 	} {
 		hdr, body := s.runCurl(c, spec[0], spec[1])
 		if strings.HasPrefix(hdr, "HTTP/1.1 501 Not Implemented\r\n") && body == "" {

commit d37ebe600d3984c821386c347f33c59ef3355e03
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Jul 23 01:09:02 2015 -0400

    5824: Add doc.go

diff --git a/services/keep-web/doc.go b/services/keep-web/doc.go
new file mode 100644
index 0000000..dbf4f5b
--- /dev/null
+++ b/services/keep-web/doc.go
@@ -0,0 +1,57 @@
+// Keep-web provides read-only HTTP access to files stored in Keep. It
+// serves public data to anonymous and unauthenticated clients, and
+// accepts authentication via Arvados tokens. It can be installed
+// anywhere with access to Keep services, typically behind a web proxy
+// that provides SSL support.
+//
+// Given that this amounts to a web hosting service for arbitrary
+// content, it is vital to ensure that at least one of the following is
+// true:
+//
+// Usage
+//
+// Listening:
+//
+//   keep-web -address=:1234
+//
+// Start an HTTP server on port 1234.
+//
+//   keep-web -address=1.2.3.4:1234
+//
+// Start an HTTP server on port 1234, on the interface with IP address 1.2.3.4.
+//
+// Keep-web does not support SSL natively. Typically, it is installed
+// behind a proxy like nginx.
+//
+package main
+
+// TODO(TC): Implement
+//
+// Trusted content
+//
+// Normally, Keep-web is installed using a wildcard DNS entry and a
+// wildcard HTTPS certificate, serving data from collection X at
+// ``https://X.dl.example.com/path/file.ext''.
+//
+// It will also serve publicly accessible data at
+// ``https://dl.example.com/collections/X/path/file.txt'', but it does not
+// accept any kind of credentials at paths like these.
+//
+// In "trust all content" mode, Keep-web will accept credentials (API
+// tokens) and serve any collection X at
+// "https://dl.example.com/collections/X/path/file.ext".  This is
+// UNSAFE except in the special case where everyone who is able write
+// ANY data to Keep, and every JavaScript and HTML file written to
+// Keep, is also trusted to read ALL of the data in Keep.
+//
+// In such cases you can enable trust-all-content mode.
+//
+//   keep-web -trust-all-content [...]
+//
+// In the general case, this should not be enabled: A web page stored
+// in collection X can execute JavaScript code that uses the current
+// viewer's credentials to download additional data -- data which is
+// accessible to the current viewer, but not to the author of
+// collection X -- from the same origin (``https://dl.example.com/'')
+// and upload it to some other site chosen by the author of collection
+// X.

commit ff7759fbe3aab8f814a41437cd6e97b80fe56d8d
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Jun 23 19:12:58 2015 -0400

    5824: Add install doc

diff --git a/doc/install/install-keep-web.html.textile.liquid b/doc/install/install-keep-web.html.textile.liquid
new file mode 100644
index 0000000..4777668
--- /dev/null
+++ b/doc/install/install-keep-web.html.textile.liquid
@@ -0,0 +1,64 @@
+---
+layout: default
+navsection: installguide
+title: Install download server
+...
+
+This installation guide assumes you are on a 64 bit Debian or Ubuntu system.
+
+The keep-web server provides read-only HTTP access to files stored in Keep. It serves public data to anonymous and unauthenticated clients, and accepts authentication via Arvados tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides SSL support.
+
+By convention, we use the following hostname for the download service:
+
+<div class="offset1">
+table(table table-bordered table-condensed).
+|dl. at uuid_prefix@.your.domain|
+</div>
+
+This hostname should resolve from anywhere on the internet.
+
+h2. Install keep-web
+
+First add the Arvados apt repository, and then install the keep-web package.
+
+<notextile>
+<pre><code>~$ <span class="userinput">echo "deb http://apt.arvados.org/ wheezy main" | sudo tee /etc/apt/sources.list.d/apt.arvados.org.list</span>
+~$ <span class="userinput">sudo /usr/bin/apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7</span>
+~$ <span class="userinput">sudo /usr/bin/apt-get update</span>
+~$ <span class="userinput">sudo /usr/bin/apt-get install keep-web</span>
+</code></pre>
+</notextile>
+
+Verify that @keep-web@ is functional:
+
+<notextile>
+<pre><code>~$ <span class="userinput">keep-web -h</span>
+Usage of keep-web:
+  -address="0.0.0.0:80": Address to listen on, "host:port".
+</code></pre>
+</notextile>
+
+We recommend running @arv-git-httpd@ under "runit":https://packages.debian.org/search?keywords=runit or something similar.
+
+Your @run@ script should look something like this:
+
+<notextile>
+<pre><code>export ARVADOS_API_HOST=<span class="userinput">uuid_prefix</span>.your.domain
+exec sudo -u nobody keep-web -address=:9002 2>&1
+</code></pre>
+</notextile>
+
+h3. Set up a reverse proxy with SSL support
+
+The keep-web service will be accessible from anywhere on the internet, so we recommend using SSL for transport encryption.
+
+This is best achieved by putting a reverse proxy with SSL support in front of keep-web, running on port 443 and passing requests to keep-web on port 9002 (or whatever port you chose in your run script).
+
+h3. Tell the API server about the keep-web service
+
+In your API server's config/application.yml file, add the following entry:
+
+<notextile>
+<pre><code>keep-web: dl.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>

commit fc7c9e1ad6345888595205c0978c58a6710d7446
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jun 17 02:47:49 2015 -0400

    5824: Assign MIME type by file extension. closes #6327

diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index bbcd53c..48e3640 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -3,6 +3,7 @@ package main
 import (
 	"fmt"
 	"io"
+	"mime"
 	"net/http"
 	"os"
 	"strings"
@@ -146,6 +147,17 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		statusCode, statusText = http.StatusBadGateway, err.Error()
 		return
 	}
+
+	// One or both of these can be -1 if not found:
+	basenamePos := strings.LastIndex(filename, "/")
+	extPos := strings.LastIndex(filename, ".")
+	if extPos > basenamePos {
+		// Now extPos is safely >= 0.
+		if t := mime.TypeByExtension(filename[extPos:]); t != "" {
+			w.Header().Set("Content-Type", t)
+		}
+	}
+
 	_, err = io.Copy(w, rdr)
 	if err != nil {
 		statusCode, statusText = http.StatusBadGateway, err.Error()
diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
index 1c36f98..66c6812 100644
--- a/services/keep-web/server_test.go
+++ b/services/keep-web/server_test.go
@@ -109,6 +109,12 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 			continue
 		}
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+		if strings.HasSuffix(spec[1], ".txt") {
+			c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
+			// TODO: Check some types that aren't
+			// automatically detected by Go's http server
+			// by sniffing the content.
+		}
 		c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec[2])
 	}
 }

commit e3778ef533f50b0492eef80bb2525a7a09628c32
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Jul 23 00:02:11 2015 -0400

    5824: Add keepdl.

diff --git a/services/keep-web/.gitignore b/services/keep-web/.gitignore
new file mode 100644
index 0000000..173e306
--- /dev/null
+++ b/services/keep-web/.gitignore
@@ -0,0 +1 @@
+keepdl
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
new file mode 100644
index 0000000..bbcd53c
--- /dev/null
+++ b/services/keep-web/handler.go
@@ -0,0 +1,153 @@
+package main
+
+import (
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"strings"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+	"git.curoverse.com/arvados.git/sdk/go/auth"
+	"git.curoverse.com/arvados.git/sdk/go/httpserver"
+)
+
+var clientPool = arvadosclient.MakeClientPool()
+
+var anonymousTokens []string
+
+type handler struct{}
+
+func init() {
+	// TODO(TC): Get anonymousTokens from flags
+	anonymousTokens = []string{}
+}
+
+func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
+	var statusCode int
+	var statusText string
+
+	w := httpserver.WrapResponseWriter(wOrig)
+	defer func() {
+		if statusCode > 0 {
+			if w.WroteStatus() == 0 {
+				w.WriteHeader(statusCode)
+			} else {
+				httpserver.Log(r.RemoteAddr, "WARNING",
+					fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
+			}
+		}
+		if statusText == "" {
+			statusText = http.StatusText(statusCode)
+		}
+		httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.URL.Path)
+	}()
+
+	arv := clientPool.Get()
+	if arv == nil {
+		statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
+		return
+	}
+	defer clientPool.Put(arv)
+
+	pathParts := strings.Split(r.URL.Path[1:], "/")
+
+	if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
+		statusCode = http.StatusNotFound
+		return
+	}
+
+	var targetId string
+	var targetPath []string
+	var tokens []string
+	var reqTokens []string
+	var pathToken bool
+	if len(pathParts) >= 5 && pathParts[1] == "download" {
+		// "/collections/download/{id}/{token}/path..." form:
+		// Don't use our configured anonymous tokens,
+		// Authorization headers, etc.  Just use the token in
+		// the path.
+		targetId = pathParts[2]
+		tokens = []string{pathParts[3]}
+		targetPath = pathParts[4:]
+		pathToken = true
+	} else {
+		// "/collections/{id}/path..." form
+		targetId = pathParts[1]
+		reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+		tokens = append(reqTokens, anonymousTokens...)
+		targetPath = pathParts[2:]
+	}
+
+	tokenResult := make(map[string]int)
+	collection := make(map[string]interface{})
+	found := false
+	for _, arv.ApiToken = range tokens {
+		err := arv.Get("collections", targetId, nil, &collection)
+		httpserver.Log(err)
+		if err == nil {
+			// Success
+			found = true
+			break
+		}
+		if srvErr, ok := err.(arvadosclient.APIServerError); ok {
+			switch srvErr.HttpStatusCode {
+			case 404, 401:
+				// Token broken or insufficient to
+				// retrieve collection
+				tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
+				continue
+			}
+		}
+		// Something more serious is wrong
+		statusCode, statusText = http.StatusInternalServerError, err.Error()
+		return
+	}
+	if !found {
+		if pathToken {
+			// The URL is a "secret sharing link", but it
+			// didn't work out. Asking the client for
+			// additional credentials would just be
+			// confusing.
+			statusCode = http.StatusNotFound
+			return
+		}
+		for _, t := range reqTokens {
+			if tokenResult[t] == 404 {
+				// The client provided valid token(s), but the
+				// collection was not found.
+				statusCode = http.StatusNotFound
+				return
+			}
+		}
+		// The client's token was invalid (e.g., expired), or
+		// the client didn't even provide one.  Propagate the
+		// 401 to encourage the client to use a [different]
+		// token.
+		//
+		// TODO(TC): This response would be confusing to
+		// someone trying (anonymously) to download public
+		// data that has been deleted.  Allow a referrer to
+		// provide this context somehow?
+		statusCode = http.StatusUnauthorized
+		w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"")
+		return
+	}
+
+	filename := strings.Join(targetPath, "/")
+	rdr, err := arvadosclient.CollectionFileReader(collection, filename)
+	if os.IsNotExist(err) {
+		statusCode = http.StatusNotFound
+		return
+	} else if err == arvadosclient.ErrNotImplemented {
+		statusCode = http.StatusNotImplemented
+		return
+	} else if err != nil {
+		statusCode, statusText = http.StatusBadGateway, err.Error()
+		return
+	}
+	_, err = io.Copy(w, rdr)
+	if err != nil {
+		statusCode, statusText = http.StatusBadGateway, err.Error()
+	}
+}
diff --git a/services/keep-web/main.go b/services/keep-web/main.go
new file mode 100644
index 0000000..d780cc3
--- /dev/null
+++ b/services/keep-web/main.go
@@ -0,0 +1,28 @@
+package main
+
+import (
+	"flag"
+	"log"
+	"os"
+)
+
+func init() {
+	// MakeArvadosClient returns an error if this env var isn't
+	// available as a default token (even if we explicitly set a
+	// different token before doing anything with the client). We
+	// set this dummy value during init so it doesn't clobber the
+	// one used by "run test servers".
+	os.Setenv("ARVADOS_API_TOKEN", "xxx")
+}
+
+func main() {
+	flag.Parse()
+	srv := &server{}
+	if err := srv.Start(); err != nil {
+		log.Fatal(err)
+	}
+	log.Println("Listening at", srv.Addr)
+	if err := srv.Wait(); err != nil {
+		log.Fatal(err)
+	}
+}
diff --git a/services/keep-web/server.go b/services/keep-web/server.go
new file mode 100644
index 0000000..44da00f
--- /dev/null
+++ b/services/keep-web/server.go
@@ -0,0 +1,27 @@
+package main
+
+import (
+	"flag"
+	"net/http"
+
+	"git.curoverse.com/arvados.git/sdk/go/httpserver"
+)
+
+var address string
+
+func init() {
+	flag.StringVar(&address, "address", "0.0.0.0:80",
+		"Address to listen on, \"host:port\".")
+}
+
+type server struct {
+	httpserver.Server
+}
+
+func (srv *server) Start() error {
+	mux := http.NewServeMux()
+	mux.Handle("/", &handler{})
+	srv.Handler = mux
+	srv.Addr = address
+	return srv.Server.Start()
+}
diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
new file mode 100644
index 0000000..1c36f98
--- /dev/null
+++ b/services/keep-web/server_test.go
@@ -0,0 +1,170 @@
+package main
+
+import (
+	"crypto/md5"
+	"fmt"
+	"os/exec"
+	"strings"
+	"testing"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	"git.curoverse.com/arvados.git/sdk/go/keepclient"
+	check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&IntegrationSuite{})
+
+const (
+	spectatorToken  = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
+	activeToken     = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
+	anonymousToken  = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
+	fooCollection   = "zzzzz-4zz18-fy296fx3hot09f7"
+	bogusCollection = "zzzzz-4zz18-totallynotexist"
+	hwCollection    = "zzzzz-4zz18-4en62shvi99lxd4"
+)
+
+// IntegrationSuite tests need an API server and an arv-git-httpd server
+type IntegrationSuite struct {
+	testServer *server
+}
+
+func (s *IntegrationSuite) TestNoToken(c *check.C) {
+	for _, token := range []string{
+		"",
+		"bogustoken",
+	} {
+		hdr, body := s.runCurl(c, token, "/collections/"+fooCollection+"/foo")
+		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
+		c.Check(body, check.Equals, "")
+
+		if token != "" {
+			hdr, body = s.runCurl(c, token, "/collections/download/"+fooCollection+"/"+token+"/foo")
+			c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+			c.Check(body, check.Equals, "")
+		}
+
+		hdr, body = s.runCurl(c, token, "/bad-route")
+		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+		c.Check(body, check.Equals, "")
+	}
+}
+
+// TODO: Move most cases to functional tests -- at least use Go's own
+// http client instead of forking curl. Just leave enough of an
+// integration test to assure that the documented way of invoking curl
+// really works against the server.
+func (s *IntegrationSuite) Test404(c *check.C) {
+	for _, uri := range []string{
+		// Routing errors
+		"/",
+		"/foo",
+		"/download",
+		"/collections",
+		"/collections/",
+		"/collections/" + fooCollection,
+		"/collections/" + fooCollection + "/",
+		// Non-existent file in collection
+		"/collections/" + fooCollection + "/theperthcountyconspiracy",
+		"/collections/download/" + fooCollection + "/" + activeToken + "/theperthcountyconspiracy",
+		// Non-existent collection
+		"/collections/" + bogusCollection,
+		"/collections/" + bogusCollection + "/",
+		"/collections/" + bogusCollection + "/theperthcountyconspiracy",
+		"/collections/download/" + bogusCollection + "/" + activeToken + "/theperthcountyconspiracy",
+	} {
+		hdr, body := s.runCurl(c, activeToken, uri)
+		c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
+		c.Check(body, check.Equals, "")
+	}
+}
+
+func (s *IntegrationSuite) Test200(c *check.C) {
+	anonymousTokens = []string{anonymousToken}
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.Equals, nil)
+	arv.ApiToken = activeToken
+	kc, err := keepclient.MakeKeepClient(&arv)
+	c.Assert(err, check.Equals, nil)
+	kc.PutB([]byte("Hello world\n"))
+	kc.PutB([]byte("foo"))
+	for _, spec := range [][]string{
+		// My collection
+		{activeToken, "/collections/" + fooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{"", "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{"tokensobogus", "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{activeToken, "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{anonymousToken, "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		// Anonymously accessible user agreement. These should
+		// start working when CollectionFileReader provides
+		// real data instead of fake/stub data.
+		{"", "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{activeToken, "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{spectatorToken, "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{spectatorToken, "/collections/download/"+hwCollection+"/"+spectatorToken+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+	} {
+		hdr, body := s.runCurl(c, spec[0], spec[1])
+		if strings.HasPrefix(hdr, "HTTP/1.1 501 Not Implemented\r\n") && body == "" {
+			c.Log("Not implemented!")
+			continue
+		}
+		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+		c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec[2])
+	}
+}
+
+// Return header block and body.
+func (s *IntegrationSuite) runCurl(c *check.C, token, uri string, args ...string) (hdr, body string) {
+	curlArgs := []string{"--silent", "--show-error", "--include"}
+	if token != "" {
+		curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
+	}
+	curlArgs = append(curlArgs, args...)
+	curlArgs = append(curlArgs, "http://"+s.testServer.Addr+uri)
+	c.Log(fmt.Sprintf("curlArgs == %#v", curlArgs))
+	output, err := exec.Command("curl", curlArgs...).CombinedOutput()
+	// Without "-f", curl exits 0 as long as it gets a valid HTTP
+	// response from the server, even if the response status
+	// indicates that the request failed. In our test suite, we
+	// always expect a valid HTTP response, and we parse the
+	// headers ourselves. If curl exits non-zero, our testing
+	// environment is broken.
+	c.Assert(err, check.Equals, nil)
+	hdrsAndBody := strings.SplitN(string(output), "\r\n\r\n", 2)
+	c.Assert(len(hdrsAndBody), check.Equals, 2)
+	hdr = hdrsAndBody[0]
+	body = hdrsAndBody[1]
+	return
+}
+
+func (s *IntegrationSuite) SetUpSuite(c *check.C) {
+	arvadostest.StartAPI()
+	arvadostest.StartKeep()
+}
+
+func (s *IntegrationSuite) TearDownSuite(c *check.C) {
+	arvadostest.StopKeep()
+	arvadostest.StopAPI()
+}
+
+func (s *IntegrationSuite) SetUpTest(c *check.C) {
+	arvadostest.ResetEnv()
+	s.testServer = &server{}
+	var err error
+	address = "127.0.0.1:0"
+	err = s.testServer.Start()
+	c.Assert(err, check.Equals, nil)
+}
+
+func (s *IntegrationSuite) TearDownTest(c *check.C) {
+	var err error
+	if s.testServer != nil {
+		err = s.testServer.Close()
+	}
+	c.Check(err, check.Equals, nil)
+}
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+	check.TestingT(t)
+}

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list