[ARVADOS] updated: 936b0f1a267b4d274062a3cf8bef4ae8454a5e8f

git at public.curoverse.com git at public.curoverse.com
Fri Aug 28 01:50:01 EDT 2015


Summary of changes:
 apps/workbench/Gemfile.lock                        |   3 -
 .../app/controllers/actions_controller.rb          |  10 +
 apps/workbench/app/controllers/users_controller.rb |  19 +-
 .../app/helpers/pipeline_instances_helper.rb       |   4 +-
 apps/workbench/app/models/arvados_base.rb          |   4 +
 apps/workbench/app/models/user.rb                  |   3 +
 .../application/_delete_object_button.html.erb     |   2 +-
 .../views/application/_show_home_button.html.erb   |   3 +
 .../app/views/application/_show_recent.html.erb    |  18 +-
 apps/workbench/app/views/layouts/body.html.erb     |  30 +-
 .../notifications/_ssh_key_notification.html.erb   |   5 +-
 .../pipeline_instances/_running_component.html.erb |  16 +-
 .../_show_components_running.html.erb              |  24 +-
 ...rent_token.html.erb => _current_token.html.erb} |   5 +-
 .../app/views/users/_manage_account.html.erb       |  22 -
 ...epositories.html.erb => _repositories.html.erb} |   6 +-
 ...manage_ssh_keys.html.erb => _ssh_keys.html.erb} |   9 +-
 ...achines.html.erb => _virtual_machines.html.erb} |  14 +-
 .../app/views/users/current_token.html.erb         |   1 +
 .../app/views/users/manage_account.html.erb        |   1 -
 .../app/views/users/repositories.html.erb          |   2 +
 apps/workbench/app/views/users/ssh_keys.html.erb   |   2 +
 .../app/views/users/virtual_machines.html.erb      |   1 +
 .../app/views/virtual_machines/webshell.html.erb   |   4 +-
 apps/workbench/config/routes.rb                    |   5 +-
 .../test/controllers/actions_controller_test.rb    |  39 ++
 .../controllers/application_controller_test.rb     |  41 ++
 .../test/controllers/projects_controller_test.rb   |  10 +
 .../test/controllers/users_controller_test.rb      |   4 +-
 .../test/integration/application_layout_test.rb    |  14 +-
 .../test/integration/pipeline_instances_test.rb    |   2 +-
 .../test/integration/user_profile_test.rb          |   5 +-
 ..._account_test.rb => user_settings_menu_test.rb} |  88 +++-
 backports/python-llfuse/fpm-info.sh                |  10 +
 backports/python-pycurl/fpm-info.sh                |   8 +
 doc/_config.yml                                    |  10 +-
 doc/_includes/_arv_copy_expectations.liquid        |   5 +-
 doc/_includes/_install_git_curl.liquid             |  13 +
 doc/_includes/_install_postgres.liquid             |  22 +
 .../_install_ruby_and_bundler.liquid}              |  19 +-
 doc/_includes/_ssh_addkey.liquid                   |   4 +-
 ...opy_pipeline_from_curoverse.html.textile.liquid |  63 +++
 .../create-standard-objects.html.textile.liquid    |  31 +-
 doc/install/install-api-server.html.textile.liquid | 183 +++++---
 .../install-arv-git-httpd.html.textile.liquid      | 325 +++++++++++++-
 .../install-compute-node.html.textile.liquid       |  24 +-
 .../install-crunch-dispatch.html.textile.liquid    | 121 +++--
 doc/install/install-docker.html.textile.liquid     |   6 +-
 doc/install/install-keepdl.html.textile.liquid     |  58 ++-
 doc/install/install-keepproxy.html.textile.liquid  |  24 +-
 doc/install/install-keepstore.html.textile.liquid  |   2 +-
 ...nstall-manual-prerequisites.html.textile.liquid |  28 +-
 .../install-shell-server.html.textile.liquid       |  16 +-
 doc/install/install-sso.html.textile.liquid        | 299 ++++++++-----
 .../install-workbench-app.html.textile.liquid      |  39 +-
 doc/sdk/cli/install.html.textile.liquid            |   8 +-
 doc/sdk/cli/subcommands.html.textile.liquid        |   2 +-
 doc/sdk/perl/index.html.textile.liquid             |   2 +-
 .../vm-login-with-webshell.html.textile.liquid     |   4 +-
 doc/user/reference/api-tokens.html.textile.liquid  |   4 +-
 doc/user/topics/arv-copy.html.textile.liquid       |   2 +-
 doc/user/topics/arv-run.html.textile.liquid        |   2 +-
 .../add-new-repository.html.textile.liquid         |   6 +-
 .../running-external-program.html.textile.liquid   |  27 +-
 .../tutorial-submit-job.html.textile.liquid        |   6 +-
 docker/api/Dockerfile                              |   4 +-
 docker/api/setup-gitolite.sh.in                    |   2 +-
 sdk/cli/bin/crunch-job                             |  39 +-
 sdk/cli/test/binstub_clean_fail/mount              |   3 +
 sdk/cli/test/binstub_docker_noop/docker.io         |   2 +
 sdk/cli/test/binstub_sanity_check/docker.io        |   2 +
 sdk/cli/test/binstub_sanity_check/true             |   2 +
 sdk/cli/test/test_arv-collection-create.rb         |   2 -
 sdk/cli/test/test_arv-get.rb                       |  42 +-
 sdk/cli/test/test_arv-put.rb                       |  26 --
 sdk/cli/test/test_arv-run-pipeline-instance.rb     |   2 -
 sdk/cli/test/test_arv-tag.rb                       |   2 +-
 sdk/cli/test/test_crunch-job.rb                    | 126 ++++++
 sdk/go/arvadosclient/arvadosclient.go              | 145 +++---
 sdk/go/arvadosclient/arvadosclient_test.go         |  83 ++++
 sdk/go/arvadosclient/pool.go                       |  13 +
 sdk/go/auth/auth.go                                |   2 -
 sdk/go/keepclient/collectionreader.go              | 199 +++++----
 sdk/go/keepclient/collectionreader_test.go         |  84 +++-
 sdk/go/keepclient/hashcheck.go                     |  24 +-
 sdk/go/keepclient/keepclient_test.go               |   2 +-
 sdk/pam/.dockerignore                              |   6 +
 sdk/{cwl => pam}/.gitignore                        |   0
 sdk/pam/Dockerfile                                 |  52 +++
 sdk/pam/MANIFEST.in                                |   4 +
 sdk/pam/README.rst                                 |  21 +
 sdk/pam/arvados_pam.py                             | 100 -----
 sdk/pam/arvados_pam/__init__.py                    |  51 +++
 sdk/pam/arvados_pam/auth_event.py                  |  88 ++++
 sdk/pam/debian/arvados_pam                         |  10 -
 sdk/pam/debian/shellinabox                         | 136 ------
 sdk/pam/examples/shellinabox                       |  27 ++
 sdk/pam/fpm-info.sh                                |  15 +
 sdk/{cwl => pam}/gittaggers.py                     |   0
 .../pam/integration_tests}/__init__.py             |   0
 sdk/pam/integration_tests/test_pam.py              |  26 ++
 sdk/pam/lib/libpam_arvados.py                      |   1 +
 sdk/pam/pam-configs/arvados                        |  14 +
 sdk/pam/setup.py                                   |  46 ++
 .../performance => sdk/pam/tests}/__init__.py      |   0
 sdk/pam/tests/integration_test.pl                  |  46 ++
 sdk/pam/tests/mocker.py                            |  59 +++
 sdk/pam/tests/test_auth_event.py                   |  95 ++++
 sdk/pam/tests/test_pam_sm.py                       |  26 ++
 sdk/perl/Makefile.PL                               |   7 +-
 sdk/python/arvados/arvfile.py                      |   4 +-
 sdk/python/arvados/commands/arv_copy.py            |  51 ++-
 sdk/python/arvados/commands/put.py                 |  19 +-
 sdk/python/arvados/commands/run.py                 |   9 +-
 sdk/python/arvados/events.py                       |  60 ++-
 sdk/python/arvados/keep.py                         |   5 +-
 sdk/python/arvados/retry.py                        |   3 +-
 sdk/python/bin/arv-get                             |  73 +--
 sdk/python/fpm-info.sh                             |   8 +
 sdk/python/tests/run_test_server.py                |  12 +-
 sdk/python/tests/test_collections.py               |   2 +-
 sdk/python/tests/test_retry.py                     |   6 +-
 sdk/python/tests/test_websockets.py                | 136 +++---
 .../arvados/v1/repositories_controller.rb          | 136 ++++--
 services/api/app/models/arvados_model.rb           |  11 +-
 services/api/app/models/authorized_key.rb          |   4 +-
 services/api/app/models/collection.rb              |  18 +-
 services/api/app/models/commit.rb                  |  32 +-
 .../views/user_notifier/account_is_setup.text.erb  |   4 +-
 services/api/config/application.default.yml        | 488 +++++++++++----------
 services/api/config/application.yml.example        |  31 +-
 .../{database.yml.sample => database.yml.example}  |   0
 .../initializers/hardcoded_api_tokens.rb.example   |   3 -
 services/api/lib/eventbus.rb                       |  11 +-
 .../api/script/arvados-git-sync.rb                 |   7 +-
 services/api/script/crunch-dispatch.rb             |   7 +
 services/api/test/fixtures/humans.yml              |   1 +
 services/api/test/fixtures/links.yml               |  14 +
 services/api/test/fixtures/users.yml               |   1 +
 .../arvados/v1/collections_controller_test.rb      |  78 ++++
 .../arvados/v1/repositories_controller_test.rb     |  97 +++-
 .../functional/arvados/v1/users_controller_test.rb |   3 +-
 services/api/test/integration/websocket_test.rb    |  47 +-
 services/api/test/unit/authorized_key_test.rb      |  46 +-
 services/api/test/unit/collection_test.rb          |  60 +++
 services/arv-git-httpd/auth_handler.go             |  16 +-
 services/arv-git-httpd/server_test.go              |  29 ++
 services/datamanager/datamanager.go                |  44 +-
 services/datamanager/keep/keep.go                  |  84 +++-
 services/datamanager/keep/keep_test.go             |  81 ++++
 services/datamanager/summary/pull_list.go          |   6 +-
 services/datamanager/summary/pull_list_test.go     |  81 ++--
 services/datamanager/summary/trash_list.go         |  60 +++
 services/datamanager/summary/trash_list_test.go    |  76 ++++
 services/fuse/arvados_fuse/__init__.py             |  64 ++-
 services/fuse/arvados_fuse/fusedir.py              |  12 +-
 services/fuse/tests/mount_test_base.py             |   4 +-
 services/fuse/tests/test_mount.py                  |  37 +-
 services/keepdl/doc.go                             |  82 +++-
 services/keepdl/handler.go                         | 200 +++++----
 services/keepdl/handler_test.go                    |  47 +-
 services/keepdl/main.go                            |   3 +
 services/keepdl/server.go                          |   4 +-
 services/keepdl/server_test.go                     | 202 +++++++--
 services/keepproxy/keepproxy.go                    |   2 +-
 services/keepstore/bufferpool_test.go              |   5 +
 services/keepstore/handlers.go                     |  26 +-
 services/keepstore/perms_test.go                   |   2 +-
 services/keepstore/pull_worker.go                  |   1 +
 services/keepstore/pull_worker_integration_test.go |   6 +-
 services/keepstore/pull_worker_test.go             |  48 +-
 services/keepstore/status_test.go                  |  21 +
 services/keepstore/trash_worker.go                 |   9 +-
 services/keepstore/trash_worker_test.go            |  30 +-
 services/keepstore/volume_test.go                  |  21 +-
 services/keepstore/work_queue.go                   | 148 ++++---
 services/keepstore/work_queue_test.go              | 147 +++++--
 services/nodemanager/bin/arvados-node-manager      |   0
 178 files changed, 4654 insertions(+), 1856 deletions(-)
 create mode 100644 apps/workbench/app/views/application/_show_home_button.html.erb
 rename apps/workbench/app/views/users/{_manage_current_token.html.erb => _current_token.html.erb} (69%)
 delete mode 100644 apps/workbench/app/views/users/_manage_account.html.erb
 rename apps/workbench/app/views/users/{_manage_repositories.html.erb => _repositories.html.erb} (83%)
 rename apps/workbench/app/views/users/{_manage_ssh_keys.html.erb => _ssh_keys.html.erb} (91%)
 rename apps/workbench/app/views/users/{_manage_virtual_machines.html.erb => _virtual_machines.html.erb} (92%)
 create mode 100644 apps/workbench/app/views/users/current_token.html.erb
 delete mode 100644 apps/workbench/app/views/users/manage_account.html.erb
 create mode 100644 apps/workbench/app/views/users/repositories.html.erb
 create mode 100644 apps/workbench/app/views/users/ssh_keys.html.erb
 create mode 100644 apps/workbench/app/views/users/virtual_machines.html.erb
 rename apps/workbench/test/integration/{user_manage_account_test.rb => user_settings_menu_test.rb} (65%)
 create mode 100644 backports/python-llfuse/fpm-info.sh
 create mode 100644 backports/python-pycurl/fpm-info.sh
 create mode 100644 doc/_includes/_install_git_curl.liquid
 create mode 100644 doc/_includes/_install_postgres.liquid
 rename doc/{install/install-manual-prerequisites-ruby.html.textile.liquid => _includes/_install_ruby_and_bundler.liquid} (85%)
 create mode 100644 doc/install/copy_pipeline_from_curoverse.html.textile.liquid
 create mode 100755 sdk/cli/test/binstub_clean_fail/mount
 create mode 100755 sdk/cli/test/binstub_docker_noop/docker.io
 create mode 100755 sdk/cli/test/binstub_sanity_check/docker.io
 create mode 100755 sdk/cli/test/binstub_sanity_check/true
 create mode 100644 sdk/cli/test/test_crunch-job.rb
 create mode 100644 sdk/pam/.dockerignore
 copy sdk/{cwl => pam}/.gitignore (100%)
 create mode 100644 sdk/pam/Dockerfile
 create mode 100644 sdk/pam/MANIFEST.in
 create mode 100644 sdk/pam/README.rst
 delete mode 100644 sdk/pam/arvados_pam.py
 create mode 100644 sdk/pam/arvados_pam/__init__.py
 create mode 100644 sdk/pam/arvados_pam/auth_event.py
 delete mode 100644 sdk/pam/debian/arvados_pam
 delete mode 100644 sdk/pam/debian/shellinabox
 create mode 100644 sdk/pam/examples/shellinabox
 create mode 100644 sdk/pam/fpm-info.sh
 copy sdk/{cwl => pam}/gittaggers.py (100%)
 copy {services/fuse/tests/performance => sdk/pam/integration_tests}/__init__.py (100%)
 create mode 100644 sdk/pam/integration_tests/test_pam.py
 create mode 100644 sdk/pam/lib/libpam_arvados.py
 create mode 100644 sdk/pam/pam-configs/arvados
 create mode 100755 sdk/pam/setup.py
 copy {services/fuse/tests/performance => sdk/pam/tests}/__init__.py (100%)
 create mode 100755 sdk/pam/tests/integration_test.pl
 create mode 100644 sdk/pam/tests/mocker.py
 create mode 100644 sdk/pam/tests/test_auth_event.py
 create mode 100644 sdk/pam/tests/test_pam_sm.py
 create mode 100644 sdk/python/fpm-info.sh
 rename services/api/config/{database.yml.sample => database.yml.example} (100%)
 delete mode 100644 services/api/config/initializers/hardcoded_api_tokens.rb.example
 rename docker/api/update-gitolite.rb => services/api/script/arvados-git-sync.rb (96%)
 create mode 100644 services/api/test/fixtures/humans.yml
 create mode 100644 services/datamanager/keep/keep_test.go
 create mode 100644 services/datamanager/summary/trash_list.go
 create mode 100644 services/datamanager/summary/trash_list_test.go
 create mode 100644 services/keepstore/status_test.go
 mode change 100644 => 100755 services/nodemanager/bin/arvados-node-manager

  discards  82780d422b0f8a4ee4e4df52673cc88e7bb936a5 (commit)
  discards  9a02abe887e3c09115a000f9f63666db9fe96172 (commit)
  discards  18277495a1593a0a9a7d67b39f5fb47c62979f2c (commit)
  discards  cddb2c4abda1607ff36f248ddf9a291191114598 (commit)
  discards  6c011d450d6e320892989cc37605f7cf67dc3034 (commit)
  discards  1f9ebffd675488ef594a3ebc81f4c6eb63da7887 (commit)
  discards  5269624aeb5cd29dcb8b488bf8a297fdb6c12e0e (commit)
  discards  f3ae4368efc15797d1d9c6179ca4ba460bdd5da0 (commit)
  discards  d40945c358950d9e43e4ad0aa1ec9fd02353090d (commit)
  discards  bed6a3d54844b6ba41f5f7803f3b289783e02e5f (commit)
  discards  2534cab13807ee2614400365b1cb6a4649c6678e (commit)
  discards  12c47d0c0dc38a9e1d1e5a0e953a226a1a0557c6 (commit)
  discards  847293a0e90fed989b9dea9a99f00126415530b3 (commit)
  discards  ad05e4948fab822910fbf57f60b739f100a5cdb1 (commit)
  discards  d9434ed5ae6f129227a74cc85dc15fa6bdf199ac (commit)
  discards  8b8eb200a3e8d52f1fb98142771412447bb2911e (commit)
  discards  99146504cd618769de59176ee7458c8973877242 (commit)
  discards  ca8e6a099f43d0b227fd1983dabbdaa6cbfb7246 (commit)
  discards  e024bcf310e61819a75e1ef3e45cf99b6457cfb0 (commit)
       via  936b0f1a267b4d274062a3cf8bef4ae8454a5e8f (commit)
       via  c792414b525a64a3407c876f6b8e69570adcb456 (commit)
       via  f8381e235f2d63714f3dcc521298a2f5c3b5c439 (commit)
       via  3cbaa1bece831d2c2f4204cedeaf1751b47a597a (commit)
       via  15cfa6bc3793a54d07fab0f3147cababec4dc5f6 (commit)
       via  99bc88b31986033fbc497c194907a12a97fdce61 (commit)
       via  fa9174d4f575883826465cd29cc2f374d0ed6ed3 (commit)
       via  777fcbd316c112be6e0f4265ca65a9a9f226f27d (commit)
       via  ba094b970272ece90f9ff368e18549660faa05de (commit)
       via  1ee305e5d4b80e478eee71b4b11e8221456eff15 (commit)
       via  cdecbc7f736831dfd6017d975753bec66433e5bf (commit)
       via  dbebb56bc61fb52928edfcfca64cfec9a697362c (commit)
       via  310ccee9f772373256fd836e421f918349d6416f (commit)
       via  5be8366eb525129c9b6d2b710365cf4dbc9392c0 (commit)
       via  e7f7945dfc2d3ec30e0cef4f781cd47c31287efa (commit)
       via  26ffcd38fd6b861a9005959ac016e826d30b8904 (commit)
       via  374ce34bc1f1e8bca18c12f70d7de8dc8c5b60b6 (commit)
       via  59976e5583eda9ff1bf35cb299165829829c5b5d (commit)
       via  73aca60f271fb8217204e7659e45889b4678f33c (commit)
       via  75bbb895da9e69439da97de8e10266930616b708 (commit)
       via  1f0466a3493bd4a04d4164925c0255c28ad0f937 (commit)
       via  3401991df49ba927ac4b1fe47c6e3b7ad1d26883 (commit)
       via  050be075a74dee5247346f1485505366b2fb7a51 (commit)
       via  80213c8143ff102b99aa63ac4de6576269524b28 (commit)
       via  73dcc98470b85c6b2a0aef5c0f7fb9f599e4391a (commit)
       via  209ae8068a9952ecf8ae20311719b25f05a3e4cb (commit)
       via  e9ce5eb4546431cb445b9c9286441755c91d7739 (commit)
       via  a70f39f361c3c60018b90f02af64525534025e09 (commit)
       via  d1131e5f3d4d0d7bcfdfca3451404d6b856a6eb8 (commit)
       via  f96550cc40f340c79339338d1da81394bfcb64ad (commit)
       via  586ea290542efbd35f6e32fd6959d07cab00c969 (commit)
       via  3dc08cb9e6222486dcaadd50e61cad26d3200289 (commit)
       via  7671ee342c26503984616eb6a864b44070b8f6d9 (commit)
       via  98d6a7c2db7b8be7050fe7394147df5c5ce392bb (commit)
       via  c0f9c128aabb366435d751a3ea1a63b76c177f5b (commit)
       via  c249e92657d76221cf3977145a8dfbd79e8f6d9a (commit)
       via  bf23a991e15aa268f98de95e5ebee0564dfb1221 (commit)
       via  4c3860e66b4a4f8108de793ddcfb66b8b5b182aa (commit)
       via  f8daac938f766732ade005aaf8ac239dce1f697f (commit)
       via  d07162cf2fddaafaa4ea0775d396d77f01083501 (commit)
       via  493e39c8a1ab3caf10ae1c03e16d82f4b77e4a6a (commit)
       via  2825758b45ca273e7bcf176b636ff36487d25857 (commit)
       via  094d247c7aaddbcd0a512887c6ce115558d14f72 (commit)
       via  2566020d5e03c6934b9ae6c8b70368da1d03f526 (commit)
       via  5dd20e4a0aac7428a68dabbf5ec2c6800f32957b (commit)
       via  2a77960071b74d33fcca8bf8a8be495320e564e2 (commit)
       via  c52b7c3f28031e6dec9601ffd8583f689149a380 (commit)
       via  aae246886e945f0bba3bfa2a817971e4ad52e050 (commit)
       via  318d446ad02c4511146403897db076acf16e0c9e (commit)
       via  68287acb04a459bb6d9410dd619c5b3b00631a15 (commit)
       via  98feead9acd503a81c69b06bf07d6c1bfd3dd458 (commit)
       via  8e671a545fd8abbf74afa109c0150c1d5772a207 (commit)
       via  bd583d21bb62894a5960b10bf81b375fe6336267 (commit)
       via  1e5f8ef2a8c594eb0de874bef72d6d1485725b5a (commit)
       via  e736def390001e01ff6887acbb3c6f08366f6a91 (commit)
       via  6423d36f00308b5c313715d8ddcc160052010a1c (commit)
       via  fbe23d045022aac8ff3ae691052af25968680944 (commit)
       via  fe4f75a44988826afa194c68b455c685d3b3fa8c (commit)
       via  c2f718800dbb03d336c0370631e9ea81dbb3997f (commit)
       via  9f45d7dd8adfac9a2f690de6a0831498cff5512f (commit)
       via  e14e011f667d314e557c580de69a271534b6149f (commit)
       via  2dfb886e960cf918e54b5f03477f464afb322a9b (commit)
       via  173ebc1102e6a5a5c3a26c1bb231a4b035713369 (commit)
       via  e889ec14bbd18cf82acfabc681d0db967772692d (commit)
       via  d2e546749afaa1ff8fff8cb920b9a54d58154b76 (commit)
       via  522404b2066c8b635a240f79dd4bf652479afadb (commit)
       via  862843135810f7ba4bc6c2138f2ba1bb8840e432 (commit)
       via  f618b6517fff37194a1a4183a607658f46952732 (commit)
       via  2d9cae58ae387d29d161ea96b6cd704d0764b1d8 (commit)
       via  7c89127a0214c77578c09bac9ac24ce9e1c5e104 (commit)
       via  70ddebda3dbe90c8a347c9077397106c6fb949c0 (commit)
       via  a6edcb025f0b659e464fd3e98e59e966ced88afd (commit)
       via  ae2478d6d09d2ab2eac1acceab8342f4f900ec8b (commit)
       via  d89b7ae1f6fa35dd3627ead14c855751f1de2193 (commit)
       via  5562d6d556a942b66ea392c1e9bc803f9b9733e7 (commit)
       via  ab689cf0a5c73e1fa0525416fa12aaf5ba88abc9 (commit)
       via  4de0af809ffbef43d89cd1751e5d611a4b5445e9 (commit)
       via  e0213cbec6a151e077b8cca00700815c3c3d18e7 (commit)
       via  2553fde59a3cf872be891a4f689c241055080c35 (commit)
       via  63b82f437c0f237c76f460ef71be0bb3ed42f7f4 (commit)
       via  c009ae5d372cce86b055aacff479712333baa413 (commit)
       via  132bbb69970640e2906bc1285bee9709accb3fc7 (commit)
       via  507110dc0aa1329ac4e5aad59c347a49e9f77364 (commit)
       via  1702335792308d7c1d578c143a5c99b943f112a9 (commit)
       via  4fc613797f88dbb33c234ba7cd13965b1236bfee (commit)
       via  2a96c097e5a176018d078a5d6985403072e8672e (commit)
       via  970907f28866a09a9fe95da48dffa6cd34ab4dca (commit)
       via  3959d7afff8bb3c3b8da9eb7d178919275180f2a (commit)
       via  bdb850521603561439429c99e414ec702d7f83b1 (commit)
       via  50ff63e948234f6f67acce0aec909f7b6f4705b8 (commit)
       via  06f774bc61654ea0bb6ddd7b9f61bde1dd56b884 (commit)
       via  4357a60b2805af7151e7c24a8e616b36584a22e3 (commit)
       via  c24168b75a7bfb6813843bd0c1825baae7434cc9 (commit)
       via  6cfe8a2abb6121617286c0931ca723cadfc9e98f (commit)
       via  9209660a3bd951c3945bce0da2fa9195cb002e44 (commit)
       via  0988acb472849dc08d576ee40493e70bde2132ca (commit)
       via  f46b05c041684ced4cf438ae6cade577156f81a5 (commit)
       via  fc851b249ea25a40a1fb392906705142113ac5b9 (commit)
       via  d2d7138c84a55ef87937cdaefd9c58a66916d76f (commit)
       via  8fe01fcc1b88a49f1f7eff14d0435e3ac0649721 (commit)
       via  8089b2f5c97b1db9bd826a1b6488f1b060830def (commit)
       via  97f16e9b6ce0095a40b68781238550d066a15261 (commit)
       via  43338e77fc9ac255511395d8a8b1ae4bb8c98577 (commit)
       via  bbf7272aa2b831102c47fc93f8966ec32e918205 (commit)
       via  03d5c4df2f33c5bb2117c45869808fa018e855f1 (commit)
       via  261fe4c689858952b19991e0055eda669ab144af (commit)
       via  427d9052d59ca7819acba9fb2e5f381d3e44a53e (commit)
       via  c8bea0c6dd47a9b9a7a892602d9869177d4c231f (commit)
       via  93e0931ea059355ffc26add1303a52f13d2964c9 (commit)
       via  db717120a687b2851e526117a135a45be75cba1a (commit)
       via  b7c99efa73a3b0ccfec85915f04545d634b235b0 (commit)
       via  5f642789f87c1f0cefbb878cbb031c9b71fc7a09 (commit)
       via  54ad79868ed16e4a53f943ec9dc104c28dee2343 (commit)
       via  092e1b42918850f7166cb4e3bbda25b67049105f (commit)
       via  5916ac79faa2384d75cc41ab4af3ff1a881e8d6e (commit)
       via  29a54ec4795c707b19858e3e02dcc48bf3d77e75 (commit)
       via  467b636f7d1b34f7695f55af972ae90132fc8063 (commit)
       via  2f719adbd0eb4ce07fc2c7012bc1ce8df02e790b (commit)
       via  75eaaaa74a4a7ec6821008fe93dbee598ee24dee (commit)
       via  d6b2ead0824fe6810917e5281b4feb969528eb46 (commit)
       via  15ac44ec46bbbea31bf6a1ccc1842f7703b9f832 (commit)
       via  7e112cd7504e7e379604e2b1fd46b53054a24050 (commit)
       via  5641c4b171f72dd600e17e6db46b3723ab861a4c (commit)
       via  27daf08f38eec505c224e7776678b32d50241e13 (commit)
       via  29d73c8ab33b0d9c34074cd09e581cd7584da31e (commit)
       via  559acee1dca5524f9650e360d5f16fc1582a77f0 (commit)
       via  9036e4876fa3710b12a1dfb465652c04b9a73901 (commit)
       via  e76165d4c3b0cb5929bfec08f36a95ecd80cd564 (commit)
       via  48dd255814cfc90a095132b6f621af13430267e0 (commit)
       via  595d517e5293855690fdc65255648c32970dd31d (commit)
       via  baccfce65b0c997202c22e36a2ebfc455eff0334 (commit)
       via  f097af1ebfdb1ff849c1d5be4bda5b098871b877 (commit)
       via  bdabb9aa520b598107e319e51638f899f136aff5 (commit)
       via  62f471c57d3acd04fe6bb3dc10c195f2cef10024 (commit)
       via  5cff00f1efc7515fd0b8543f618214ecec384eb5 (commit)
       via  44d4d43331979c87cee5df9ff952fd80a6e9c5f8 (commit)
       via  5f3ac75bd946f743ca40ebf454dc29900cddfefb (commit)
       via  e0a1fc70f919741a8ad840dc40cfcc87f2751722 (commit)
       via  e9bee65d40255ab7aced5d380c546068c604bc38 (commit)
       via  68481a07e385c86a03b9c963adc9be091373a78a (commit)
       via  004243ab59e3a2fc36a708b66373297dd83e0b91 (commit)
       via  b369dfc85a356371e1bfb1eb1c3ddb8e8eaffc3d (commit)
       via  6e65fde6143673c27ba85fef16acb1f5dce2d5b9 (commit)
       via  3ba1af7f304edf32aea7250f569e5348f2af501e (commit)
       via  89fccf123374c67e738381ea840e3535b2d1074f (commit)
       via  f2f4727cf8a91e363c2a1c85e98fcdd6419abdb5 (commit)
       via  44184f3bebe52f3d4ea567b8105f3631fcb003fe (commit)
       via  10556d8eea7e57dafe1495f8ceb6fae809d033a3 (commit)
       via  6fad718eea05f4292d89470b62e067754873e85d (commit)
       via  6f883592fd278398f8b5af3d4ff26196469a5050 (commit)
       via  f7e00eb55fbcc41b8cd1f416344ad4abdb020bc0 (commit)
       via  cca0d86de72487988b1a7d657e67143121461a0f (commit)
       via  5b2e3d9c92b34603912872c2e10e13da91268a29 (commit)
       via  e32890977964e1c14169962edeba4f642e4e1091 (commit)
       via  9fcd7e96ad9ed8c7f3a343663a06ee9cbaa15aa9 (commit)
       via  8f93d1332a25d29aa96211920e1b399a1e94482c (commit)
       via  b958c247cba0cc0a9b13dfd4fbc10c1a807b5074 (commit)
       via  01a76d36f6afcd3bb7c8e224881fe98364a47b85 (commit)
       via  30d1623c0fe687acdef334c037c24752c0c61620 (commit)
       via  c94bc0d5c2ae1514a7b0799477279567fa4df177 (commit)
       via  89dcdd013aef473cef6f2b94bfcd2308b60a55d4 (commit)
       via  70a9d8683b62a91e7d3ee3e827af6b69b9aa9a94 (commit)
       via  4c5925a0eceebcd86a4c59a0123dac49aabffae0 (commit)
       via  b9a61af34e1f77500d5d0850b4bfd6e11a6f783a (commit)
       via  821cb42d42976ec6f750e0b0b191ccc36cbad295 (commit)
       via  cec08599bc15bccdf1acfebb4f773f139cc45dcb (commit)
       via  ed2e9fece8c44c5aa464de012b3fce84e7b049eb (commit)
       via  0c96fc858f1f4954d997f2065550087dcc7f8bec (commit)
       via  2e92762444af21749e1ef232aecb6eeb98b312cd (commit)
       via  5f708f621d43585e59787d86d4e3565e09f9c6b8 (commit)
       via  80abb8a0e679f4dd1a7ac00cd629f854af160de5 (commit)
       via  b3a25028eaf04bf77e1f0665968527a9084a84dc (commit)
       via  7e773735e9d547381a77baeeb139696dfa66a7a6 (commit)
       via  15c86f88f1f5124c082e77dedd0b97c5efc93388 (commit)
       via  95135f30f13fe233da5d8713737561f77a97c6b6 (commit)
       via  434be0f7e6420fee1b99e78466ee4a4d734734c1 (commit)
       via  b6f800ae7e474f1ceeb827fc9712296a96514592 (commit)
       via  b5b88cdaefb4f35b877c3279d6556fae7d62afcf (commit)
       via  a812a13a6e9854f37ecebccedeb163b8cbe7130e (commit)
       via  93655055b823716a4e45abc4f35f6ba06920e11b (commit)
       via  d44692dedcf0af87f53653afc27154c38a7b72a5 (commit)
       via  a789f895a5cdbc2a548787f99197f9861521106b (commit)
       via  97fa1e269cf5d33365d76fcdc6ec3f6734b0f468 (commit)
       via  4d6e05c25c6a5d72afee37f8165b006267b4183d (commit)
       via  41bb13b71c8593134c2b34884249ca862b185e04 (commit)
       via  6988f4d44d2f8f7fc4aa2c381334c44d3133cf31 (commit)
       via  bb58564d632498a394ee75b07d17f41b724ef824 (commit)
       via  71e1b1d62a71ad052487f5e8ecb8f36ae17ca8e1 (commit)
       via  f3504dc2b40eaa4235092b671ce0bece43732904 (commit)
       via  5ef15a740ed578b2c5d3cc4ef28ef38bb1c433af (commit)
       via  a76d71558daa6b623c00c8e262840fc6dd68119f (commit)
       via  e8ba9a0432ab7a304bb4b41eb563443f34cf2d7d (commit)
       via  fe7f9a43e42f31c35d879aee3e0ec5f811b19abf (commit)
       via  6870192ada3c34ffe5f0dc4c33ce2ad55bc2d263 (commit)
       via  620fb9e0a294f8910ae82c9c38df69976f911c08 (commit)
       via  43a95731adfcf942fd9ab598c2b8e2c80911de0c (commit)
       via  31a61e326aa23228ab88bf83ff5866e8c7ca9d0a (commit)
       via  1b8caff3ad598744e4a0379b01fc95ca4838caa0 (commit)
       via  61da6612acb822a87fca157fe39757460213473f (commit)
       via  e51a22dc5b9da795b68c87cb9d0a45e4732ed2f6 (commit)
       via  1000275df3e0b4207ae60d51d25b96f74bd06c23 (commit)
       via  465cb9225cce74600349239a295b1360ce2b0fa6 (commit)
       via  d2cbbe5a55cec8e6a874ea65acec7bf36c8c0a83 (commit)
       via  8d2aebfd3a0e4814b292659710386f949cafe092 (commit)
       via  60f3ef78fe5dc269b2c06d980cc95cbc4f8fdd84 (commit)
       via  800e8ba4da76590c8cbe9801bef49a7ffb30bfbc (commit)
       via  70ce2cc8559b74907458797f7fbd8abb6e2c5a23 (commit)
       via  449c9871fb265f111251ee89669914652ac7b4c3 (commit)
       via  6dff0705fd3b4e0acde7bdf5821ef115ba74099b (commit)
       via  39fe0413aa14ff0ba41d790441c8621a877f61f8 (commit)
       via  415ecc439212c2a670b1df05c3e8b1a90245243e (commit)
       via  8e74a5f22b14f26d33c89dadf42600d417094009 (commit)
       via  9b3223e5cbcd1eb89193e8421d430025ddf791d1 (commit)
       via  157a17462d308d94c68bcdaa4dab055c0e5ccbe8 (commit)
       via  52c8fd376a75558657969cdf11955334e95d9e83 (commit)
       via  8797ab137a1d8824292a6f7202f6cd0c9744c4f9 (commit)
       via  8699ba0baf2e346f0adc7ca79f0a8b129f56b9a7 (commit)
       via  747e22c9341d00a3f093298a979175166e2898a7 (commit)
       via  a3d4fc92c1cfc2431adb55255793e0077c0a5bd1 (commit)
       via  eba475a918e4ae847ae6b5753016f2eb54e18085 (commit)
       via  ec752f92eff3990718d169fccc06a04ddc089578 (commit)
       via  db66ff7c2511f80f3795c388f314753064378543 (commit)
       via  cda68ba838b2595c631231a08a8e3b3ea03d2c7f (commit)
       via  017d19d31606b8b313c04fffc33d44592ad9644b (commit)
       via  ccf30f40f46ea450d7ab3766f0923b486a7450d3 (commit)
       via  21f8e36579eb7091041cf696d1e7fbb14caeb6ea (commit)
       via  7d5d40c55d2a38b12e810f3b9d3e168ee434cbd2 (commit)
       via  5058d1cbdbde801a2cf7e303d83e1a626015afdd (commit)
       via  a94ec93d3322320bbf075559c4693da816b815f4 (commit)
       via  06d6b29b121b80cb975e5d9d29a9a6f9b3763c47 (commit)
       via  1e6134579d0fd7f548a9340b67709d4731b29934 (commit)
       via  7019bc98593c0603a2589d01cd028a242bf7f33f (commit)
       via  67813302a9e6d923d3c803b652929f3803cf6ece (commit)
       via  9fd9565ea0411b43943076add49d5bca2da8e379 (commit)
       via  3be96c96a4ff1c519e03e6dc2036e0b93e41ee8c (commit)
       via  2b7d9f3f2e687d524ccc1d88e470e721f63f9e51 (commit)
       via  d22b2efd246ec82a5888d500bcbc7669654b842d (commit)
       via  6152230e1f77b925c2041d688805ab6764919805 (commit)
       via  fc8a283968b08b152d57a24d2c5711c876a3b379 (commit)
       via  ec64034683c31d76d02c293f0ae27d6ae80653cb (commit)
       via  23d0c610d06eaabca9efa0ce36a672973b3e84c9 (commit)
       via  ff24bb94fdfc9dad61821aef56898e2c196c0aea (commit)
       via  ed5230f0e823445f56e7d6940b6fb178e328718b (commit)
       via  0c447acb37a221a096249c6a9cfad4255c08f575 (commit)
       via  e39e11d5850c1a2509f377e2f2551105507f59e3 (commit)
       via  5ac198c8fd7e329b08f344843fa5680add07566c (commit)
       via  004bafda085386b50e074eedf370ec40a386d6e5 (commit)
       via  c21473474aba30c12921961483990908ba525b82 (commit)
       via  56d94b28fc1ee2fe2f36f6743186e8b442c9e676 (commit)
       via  3282d76febc482aab302a44bf594bcf9591ff868 (commit)
       via  da56d26d09a6b315bdb93aed4473310b939e1bd7 (commit)
       via  c8f0866051391c9932a8d39bbd3cacb83e60c20e (commit)
       via  607c3ded2ab3ad0c04ef2e3520ce1b507774dedb (commit)
       via  c0893f609643a73950957c0aa228f167579951d7 (commit)
       via  fd1c8009f521564eaec4dbb22cead032e3784023 (commit)
       via  6d2811a2e001369b950c452be3dee14db8d795c2 (commit)
       via  c86a3350fe212e35a7d745d5607fbc27bc1fd3c8 (commit)
       via  76c38b6bf63fa7329abb135a40710cbdbea18d44 (commit)
       via  0f0108a652d852b0bc165bf43e41fc378c49315a (commit)
       via  aa924bec2e4b5c029cb19f4e9021d72b665be76b (commit)
       via  e69333d856baabf8bbbc27602850dcfe86309d23 (commit)
       via  66af20886def83f6a20cc1e6587de00cbf2f8b59 (commit)
       via  4a5adfa084b3c3a8e586df5ac0acc0b3fc6150db (commit)
       via  a71c0bc8685f71dc9ebb2804626d2d12741eebc3 (commit)
       via  f4180d151eacadf1455b2ebe43ecb61cb095df7c (commit)
       via  171ac077e7335978007daf8199559290e73b8180 (commit)
       via  753c1446ea70d70043be0913e52bb270d28ecded (commit)
       via  3e6ba5fa5f225c8aa431ce9a2796369c1e1dda2d (commit)
       via  aeb481ad2a9b9c3c090b15b317d6ce262ca95da9 (commit)
       via  130cbc5cd46272834c2971b40bdba8c32eeee614 (commit)
       via  15ac0086cc6623fa3e4c601c19a14fdcd6c139ca (commit)
       via  9ce68afa9f3f1b558acd19561760c19e35b8a356 (commit)
       via  2ab08e38376d9591394d35d7676badf96f7b99c9 (commit)
       via  17ce65cd493d8040640f5a5c3d2a97a5175a0465 (commit)
       via  1b9bb00665f837cbafc87bb6fca2252e453abefc (commit)
       via  51641ba5579cb9ebe14234e0888a162b46d1627d (commit)
       via  0ae899078093ac04cfdf416940f4faa821400641 (commit)
       via  90071ec94bdc8bcbbfe6e5b2b2012863f27cd451 (commit)
       via  a467cb24c0a2db71c39ce1bf86507bff6f3cea05 (commit)
       via  ce128902e008420f453eb29986280d72777bec32 (commit)
       via  5b2b5f57eee063c102a5f76b199273ee57eb00fe (commit)
       via  591315b0be613c32666b404cde2b1fe9724baa41 (commit)
       via  51dc4f3f6d1faa0dfa79ae4d282f584fbe797299 (commit)
       via  28bdc3c6fd49e1f3761183a939b163315b413195 (commit)
       via  51a7226a1cf217fe4ea41f6d1b111b55d396485d (commit)
       via  229773c2aa3ddc798ae4eb1eff62f8f77db85391 (commit)
       via  997b13a64e1e224f77c4f1f39f0033d4750413de (commit)
       via  e302c2a74072ebe734adfb45fc6b525f299bb9fb (commit)
       via  5e33262f1aad92582215e29b875411d514846269 (commit)
       via  119d87107846d77274fdaa52dc8cdadd2a7b9765 (commit)
       via  f7f91a7085f8acfbbdd120575e88dcea53297554 (commit)
       via  6aac88a912ded276ed151501ac3977cfcc78600d (commit)
       via  50419ea603e697dd53690842a07a279577d1ca68 (commit)
       via  91b7b7fd54c4728ac6cf12181efc1fed60157ecb (commit)
       via  f51b562b9def7f6bc17e0cc52b60ffc2641d40b5 (commit)
       via  f63e86a3270836ab5e5f1de31e28b686ff809739 (commit)
       via  7da85b85af2dd9b22186bfd48190de7b68f75837 (commit)
       via  036140f305fc34fdefe0ae393b1011f4c3f840de (commit)
       via  b0b276ff6121aace3c52ee855752df6852120343 (commit)
       via  2578d032288f24988a54ee57708fe71902e4ef92 (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (82780d422b0f8a4ee4e4df52673cc88e7bb936a5)
            \
             N -- N -- N (936b0f1a267b4d274062a3cf8bef4ae8454a5e8f)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 936b0f1a267b4d274062a3cf8bef4ae8454a5e8f
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 01:45:35 2015 -0400

    5824: Fail at startup if ARVADOS_API_HOST is not set.

diff --git a/services/keepdl/main.go b/services/keepdl/main.go
index d780cc3..751543e 100644
--- a/services/keepdl/main.go
+++ b/services/keepdl/main.go
@@ -17,6 +17,9 @@ func init() {
 
 func main() {
 	flag.Parse()
+	if os.Getenv("ARVADOS_API_HOST") == "" {
+		log.Fatal("ARVADOS_API_HOST environment variable must be set.")
+	}
 	srv := &server{}
 	if err := srv.Start(); err != nil {
 		log.Fatal(err)

commit c792414b525a64a3407c876f6b8e69570adcb456
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 01:28:43 2015 -0400

    5824: Accept anonymous tokens on command line.

diff --git a/services/keepdl/handler.go b/services/keepdl/handler.go
index 7a2124a..bc68625 100644
--- a/services/keepdl/handler.go
+++ b/services/keepdl/handler.go
@@ -17,16 +17,9 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/keepclient"
 )
 
-var clientPool = arvadosclient.MakeClientPool()
-
-var anonymousTokens []string
-
 type handler struct{}
 
-func init() {
-	// TODO(TC): Get anonymousTokens from flags
-	anonymousTokens = []string{}
-}
+var clientPool = arvadosclient.MakeClientPool()
 
 // return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
 // otherwise return "".
diff --git a/services/keepdl/server.go b/services/keepdl/server.go
index 44da00f..2359f23 100644
--- a/services/keepdl/server.go
+++ b/services/keepdl/server.go
@@ -10,8 +10,8 @@ import (
 var address string
 
 func init() {
-	flag.StringVar(&address, "address", "0.0.0.0:80",
-		"Address to listen on, \"host:port\".")
+	flag.StringVar(&address, "address", ":80",
+		"Address to listen on: \"host:port\", or \":port\" to listen on all interfaces.")
 }
 
 type server struct {

commit f8381e235f2d63714f3dcc521298a2f5c3b5c439
Author: Tom Clegg <tom at curoverse.com>
Date:   Fri Aug 28 01:09:46 2015 -0400

    5824: Handle various combinations of c= and t= more consistently. Use vhosts in integration tests.

diff --git a/services/keepdl/doc.go b/services/keepdl/doc.go
index 4f61a06..f5c8de8 100644
--- a/services/keepdl/doc.go
+++ b/services/keepdl/doc.go
@@ -50,13 +50,12 @@
 // "Same-origin mode" below.
 //
 //   http://dl.example.com/c=uuid_or_pdh/path/file.txt
-//   http://dl.example.com/c=uuid_or_pdh/path/t=TOKEN/file.txt
+//   http://dl.example.com/c=uuid_or_pdh/t=TOKEN/path/file.txt
 //
 // The following "multiple origin" URL patterns are supported for all
 // collections:
 //
 //   http://uuid_or_pdh--dl.example.com/path/file.txt
-//   http://uuid_or_pdh--dl.example.com/t=/path/file.txt
 //   http://uuid_or_pdh--dl.example.com/t=TOKEN/path/file.txt
 //
 // In the "multiple origin" form, the string "--" can be replaced with
@@ -79,17 +78,35 @@
 // collection UUID or a portable data hash with the "+" character
 // replaced by "-".
 //
+// In all of the above forms, a top level directory called "_" is
+// skipped. In cases where the "path/file.txt" part might start with
+// "t=" or "c=" or "_/", links should be constructed with a leading
+// "_/" to ensure the top level directory is not interpreted as a
+// token or collection ID.
+//
 // Assuming there is a collection with UUID
 // zzzzz-4zz18-znfnqtbbv4spc3w and portable data hash
 // 1f4b0bc7583c2a7f9102c395f4ffc5e3+45, the following URLs are
 // interchangeable:
 //
 //   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/foo
-//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/t=/foo
-//   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/t=/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/_/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/_/foo
 //   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--foo.example.com/foo
 //   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--.invalid/foo
 //
+// An additional form is supported specifically to make it more
+// convenient to maintain support for existing Workbench download
+// links:
+//
+//   http://dl.example.com/collections/download/uuid_or_pdh/TOKEN/path/file.txt
+//
+// A regular Workbench "download" link is also accepted, but
+// credentials passed via cookie, header, etc. are ignored. Only
+// public data can be served this way:
+//
+//   http://dl.example.com/collections/uuid_or_pdh/path/file.txt
+//
 // Authorization mechanisms
 //
 // A token can be provided in an Authorization header:
@@ -156,7 +173,7 @@
 //
 package main
 
-// TODO(TC): Implement
+// TODO(TC): Implement?
 //
 // Trusted content
 //
diff --git a/services/keepdl/handler.go b/services/keepdl/handler.go
index 657c72d..7a2124a 100644
--- a/services/keepdl/handler.go
+++ b/services/keepdl/handler.go
@@ -50,6 +50,20 @@ func parseCollectionIdFromDNSName(s string) string {
 	return ""
 }
 
+var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
+
+// return a UUID or PDH if s is a UUID or a PDH (even if it is a PDH
+// with "+" replaced by " " or "-"); otherwise return "".
+func parseCollectionIdFromURL(s string) string {
+	if arvadosclient.UUIDMatch(s) {
+		return s
+	}
+	if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
+		return pdh
+	}
+	return ""
+}
+
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var statusCode = 0
 	var statusText string
@@ -89,79 +103,104 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	var tokens []string
 	var reqTokens []string
 	var pathToken bool
+	var credentialsOK bool
 
 	if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
-		// "http://{id}.domain.example.com/{path}" form
-		if t := r.FormValue("api_token"); t != "" {
-			// ...with explicit token in query string or
-			// form in POST body. We must encrypt the
-			// token such that it can only be used for
-			// this collection; put it in an HttpOnly
-			// cookie; and redirect to the same URL with
-			// the query param redacted, and method =
-			// GET.
-			//
-			// The HttpOnly flag is necessary to prevent
-			// JavaScript code (included in, or loaded by,
-			// a page in the collection being served) from
-			// employing the user's token beyond reading
-			// other files in the same domain, i.e., same
-			// the collection.
-			//
-			// The 303 redirect is necessary in the case
-			// of a GET request to avoid exposing the
-			// token in the Location bar, and in the case
-			// of a POST request to avoid raising warnings
-			// when the user refreshes the resulting page.
-			http.SetCookie(w, &http.Cookie{
-				Name:    "api_token",
-				Value:   auth.EncodeTokenCookie([]byte(t)),
-				Path:    "/",
-				Expires: time.Now().AddDate(10,0,0),
-			})
-			redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
-
-			w.Header().Add("Location", redir)
-			statusCode, statusText = http.StatusSeeOther, redir
-			w.WriteHeader(statusCode)
-			io.WriteString(w, `<A href="`)
-			io.WriteString(w, html.EscapeString(redir))
-			io.WriteString(w, `">Continue</A>`)
-			return
-		} else if strings.HasPrefix(pathParts[0], "t=") {
-			// ...with explicit token in path,
-			// "{...}.com/t={token}/{path}".  This form
-			// must only be used to pass scoped tokens
-			// that give permission for a single
-			// collection. See FormValue case above.
-			tokens = []string{pathParts[0][2:]}
-			targetPath = pathParts[1:]
+		// http://ID.dl.example/PATH...
+		credentialsOK = true
+		targetPath = pathParts
+	} else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
+		// /c=ID/PATH...
+		targetId = parseCollectionIdFromURL(pathParts[0][2:])
+		targetPath = pathParts[1:]
+	} else if len(pathParts) >= 3 && pathParts[0] == "collections" {
+		if len(pathParts) >= 5 && pathParts[1] == "download" {
+			// /collections/download/ID/TOKEN/PATH...
+			targetId = pathParts[2]
+			tokens = []string{pathParts[3]}
+			targetPath = pathParts[4:]
 			pathToken = true
 		} else {
-			// ...with cookie, Authorization header, or
-			// no token at all
-			reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
-			tokens = append(reqTokens, anonymousTokens...)
-			targetPath = pathParts
+			// /collections/ID/PATH...
+			targetId = pathParts[1]
+			tokens = anonymousTokens
+			targetPath = pathParts[2:]
 		}
-	} else if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
+	} else {
 		statusCode = http.StatusNotFound
 		return
-	} else if len(pathParts) >= 5 && pathParts[1] == "download" {
-		// "/collections/download/{id}/{token}/path..." form:
-		// Don't use our configured anonymous tokens,
-		// Authorization headers, etc.  Just use the token in
-		// the path.
-		targetId = pathParts[2]
-		tokens = []string{pathParts[3]}
-		targetPath = pathParts[4:]
+	}
+	if t := r.FormValue("api_token"); t != "" {
+		// The client provided an explicit token in the query
+		// string, or a form in POST body. We must put the
+		// token in an HttpOnly cookie, and redirect to the
+		// same URL with the query param redacted and method =
+		// GET.
+
+		if !credentialsOK {
+			// It is not safe to copy the provided token
+			// into a cookie unless the current vhost
+			// (origin) serves only a single collection.
+			statusCode = http.StatusBadRequest
+			return
+		}
+
+		// The HttpOnly flag is necessary to prevent
+		// JavaScript code (included in, or loaded by, a page
+		// in the collection being served) from employing the
+		// user's token beyond reading other files in the same
+		// domain, i.e., same collection.
+		//
+		// The 303 redirect is necessary in the case of a GET
+		// request to avoid exposing the token in the Location
+		// bar, and in the case of a POST request to avoid
+		// raising warnings when the user refreshes the
+		// resulting page.
+
+		http.SetCookie(w, &http.Cookie{
+			Name:     "api_token",
+			Value:    auth.EncodeTokenCookie([]byte(t)),
+			Path:     "/",
+			Expires:  time.Now().AddDate(10,0,0),
+			HttpOnly: true,
+		})
+		redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
+
+		w.Header().Add("Location", redir)
+		statusCode, statusText = http.StatusSeeOther, redir
+		w.WriteHeader(statusCode)
+		io.WriteString(w, `<A href="`)
+		io.WriteString(w, html.EscapeString(redir))
+		io.WriteString(w, `">Continue</A>`)
+		return
+	}
+
+	if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
+		// http://ID.example/t=TOKEN/PATH...
+		// /c=ID/t=TOKEN/PATH...
+		//
+		// This form must only be used to pass scoped tokens
+		// that give permission for a single collection. See
+		// FormValue case above.
+		tokens = []string{targetPath[0][2:]}
 		pathToken = true
-	} else {
-		// "/collections/{id}/path..." form
-		targetId = pathParts[1]
-		reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+		targetPath = targetPath[1:]
+	}
+
+	if tokens == nil {
+		if credentialsOK {
+			reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+		}
 		tokens = append(reqTokens, anonymousTokens...)
-		targetPath = pathParts[2:]
+	}
+
+	if len(targetPath) > 0 && targetPath[0] == "_" {
+		// If a collection has a directory called "t=foo" or
+		// "_", it can be served at //dl.example/_/t=foo/ or
+		// //dl.example/_/_/ respectively: //dl.example/t=foo/
+		// won't work because t=foo will be interpreted as a
+		// token "foo".
+		targetPath = targetPath[1:]
 	}
 
 	tokenResult := make(map[string]int)
@@ -188,11 +227,12 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		return
 	}
 	if !found {
-		if pathToken {
-			// The URL is a "secret sharing link", but it
-			// didn't work out. Asking the client for
-			// additional credentials would just be
-			// confusing.
+		if pathToken || !credentialsOK {
+			// Either the URL is a "secret sharing link"
+			// that didn't work out (and asking the client
+			// for additional credentials would just be
+			// confusing), or we don't even accept
+			// credentials at this path.
 			statusCode = http.StatusNotFound
 			return
 		}
diff --git a/services/keepdl/handler_test.go b/services/keepdl/handler_test.go
index 8977ebc..69dff4f 100644
--- a/services/keepdl/handler_test.go
+++ b/services/keepdl/handler_test.go
@@ -99,8 +99,10 @@ func authzViaPOST(r *http.Request, tok string) int {
 func doVhostRequests(c *check.C, authz authorizer) {
 	for _, hostPath := range []string{
 		arvadostest.FooCollection + ".example.com/foo",
+		arvadostest.FooCollection + "--dl.example.com/foo",
+		arvadostest.FooCollection + "--dl.example.com/_/foo",
 		arvadostest.FooPdh + ".example.com/foo",
-		strings.Replace(arvadostest.FooPdh, "+", "-", -1) + ".example.com/foo",
+		strings.Replace(arvadostest.FooPdh, "+", "-", -1) + "--dl.example.com/foo",
 	} {
 		c.Log("doRequests: ", hostPath)
 		doVhostRequestsWithHostPath(c, authz, hostPath)
diff --git a/services/keepdl/server_test.go b/services/keepdl/server_test.go
index 6fef9b8..964fa3a 100644
--- a/services/keepdl/server_test.go
+++ b/services/keepdl/server_test.go
@@ -29,7 +29,7 @@ func (s *IntegrationSuite) TestNoToken(c *check.C) {
 		"bogustoken",
 	} {
 		hdr, body, _ := s.runCurl(c, token, "dl.example.com", "/collections/"+arvadostest.FooCollection+"/foo")
-		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
+		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 		c.Check(body, check.Equals, "")
 
 		if token != "" {
@@ -119,6 +119,14 @@ func (s *IntegrationSuite) test100BlockFile(c *check.C, blocksize int) {
 	c.Check(size, check.Equals, int64(blocksize)*100)
 }
 
+type curlCase struct {
+	id      string
+	auth    string
+	host    string
+	path    string
+	dataMD5 string
+}
+
 func (s *IntegrationSuite) Test200(c *check.C) {
 	anonymousTokens = []string{arvadostest.AnonymousToken}
 	arv, err := arvadosclient.MakeArvadosClient()
@@ -128,28 +136,101 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 	c.Assert(err, check.Equals, nil)
 	kc.PutB([]byte("Hello world\n"))
 	kc.PutB([]byte("foo"))
-	for _, spec := range [][]string{
+	for _, spec := range []curlCase{
 		// My collection
-		{arvadostest.ActiveToken, "/collections/" + arvadostest.FooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{"", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{"tokensobogus", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{arvadostest.ActiveToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{arvadostest.AnonymousToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		// Anonymously accessible user agreement.
-		{"", "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{arvadostest.ActiveToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{arvadostest.SpectatorToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{arvadostest.SpectatorToken, "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{
+			auth: arvadostest.ActiveToken,
+			host: arvadostest.FooCollection + "--dl.example.com",
+			path: "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			host: strings.Replace(arvadostest.FooPdh, "+", "-", 1) + ".dl.example.com",
+			path: "/t=" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			path: "/c=" + arvadostest.FooPdh + "/t=" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			path: "/c=" + strings.Replace(arvadostest.FooPdh, "+", "-", 1) + "/t=" + arvadostest.ActiveToken + "/_/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			auth: "tokensobogus",
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			auth: arvadostest.ActiveToken,
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+		{
+			auth: arvadostest.AnonymousToken,
+			path: "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo",
+			dataMD5: "acbd18db4cc2f85cedef654fccc4a4d8",
+		},
+
+		// Anonymously accessible user agreement
+		{
+			path: "/c=" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			host: arvadostest.HelloWorldCollection + ".dl.example.com",
+			path: "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			host: arvadostest.HelloWorldCollection + ".dl.example.com",
+			path: "/_/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.ActiveToken,
+			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.SpectatorToken,
+			path: "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.SpectatorToken,
+			host: arvadostest.HelloWorldCollection + "--dl.example.com",
+			path: "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
+		{
+			auth: arvadostest.SpectatorToken,
+			path: "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt",
+			dataMD5: "f0ef7081e1539ac00ef5b761b4fb01b3",
+		},
 	} {
-		hdr, body, _ := s.runCurl(c, spec[0], "dl.example.com", spec[1])
+		host := spec.host
+		if host == "" {
+			host = "dl.example.com"
+		}
+		hdr, body, _ := s.runCurl(c, spec.auth, host, spec.path)
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
-		if strings.HasSuffix(spec[1], ".txt") {
+		if strings.HasSuffix(spec.path, ".txt") {
 			c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
 			// TODO: Check some types that aren't
 			// automatically detected by Go's http server
 			// by sniffing the content.
 		}
-		c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec[2])
+		c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec.dataMD5)
 	}
 }
 

commit 3cbaa1bece831d2c2f4204cedeaf1751b47a597a
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:34:21 2015 -0400

    5824: Comment to explain "authorizer" test helpers.

diff --git a/services/keepdl/handler_test.go b/services/keepdl/handler_test.go
index 0494376..8977ebc 100644
--- a/services/keepdl/handler_test.go
+++ b/services/keepdl/handler_test.go
@@ -42,6 +42,10 @@ func (s *IntegrationSuite) TestVhost404(c *check.C) {
 	}
 }
 
+// An authorizer modifies an HTTP request to make use of the given
+// token -- by adding it to a header, cookie, query param, or whatever
+// -- and returns the HTTP status code we should expect from keepdl if
+// the token is invalid.
 type authorizer func(*http.Request, string) int
 
 func (s *IntegrationSuite) TestVhostViaAuthzHeader(c *check.C) {

commit 15cfa6bc3793a54d07fab0f3147cababec4dc5f6
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:33:44 2015 -0400

    5824: Fix up support for PDH in vhostname.

diff --git a/services/keepdl/doc.go b/services/keepdl/doc.go
index d877356..4f61a06 100644
--- a/services/keepdl/doc.go
+++ b/services/keepdl/doc.go
@@ -72,7 +72,8 @@
 // upstream proxy.
 //
 // In all of the above forms, the "dl.example.com" part can be
-// anything at all.
+// anything at all: keepdl ignores everything after the first "." or
+// "--".
 //
 // In all of the above forms, the "uuid_or_pdh" part can be either a
 // collection UUID or a portable data hash with the "+" character
diff --git a/services/keepdl/handler.go b/services/keepdl/handler.go
index 30b4b64..657c72d 100644
--- a/services/keepdl/handler.go
+++ b/services/keepdl/handler.go
@@ -28,7 +28,8 @@ func init() {
 	anonymousTokens = []string{}
 }
 
-// return s if s is a UUID or a PDH, otherwise ""
+// return a UUID or PDH if s begins with a UUID or URL-encoded PDH;
+// otherwise return "".
 func parseCollectionIdFromDNSName(s string) string {
 	// Strip domain.
 	if i := strings.IndexRune(s, '.'); i >= 0 {
@@ -40,10 +41,13 @@ func parseCollectionIdFromDNSName(s string) string {
 	if i := strings.Index(s, "--"); i >= 0 {
 		s = s[:i]
 	}
-	if !arvadosclient.UUIDMatch(s) && !arvadosclient.PDHMatch(s) {
-		return ""
+	if arvadosclient.UUIDMatch(s) {
+		return s
 	}
-	return s
+	if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
+		return pdh
+	}
+	return ""
 }
 
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
diff --git a/services/keepdl/handler_test.go b/services/keepdl/handler_test.go
index a1f5e1a..0494376 100644
--- a/services/keepdl/handler_test.go
+++ b/services/keepdl/handler_test.go
@@ -93,7 +93,17 @@ func authzViaPOST(r *http.Request, tok string) int {
 // Try some combinations of {url, token} using the given authorization
 // mechanism, and verify the result is correct.
 func doVhostRequests(c *check.C, authz authorizer) {
-	hostPath := arvadostest.FooCollection + ".example.com/foo"
+	for _, hostPath := range []string{
+		arvadostest.FooCollection + ".example.com/foo",
+		arvadostest.FooPdh + ".example.com/foo",
+		strings.Replace(arvadostest.FooPdh, "+", "-", -1) + ".example.com/foo",
+	} {
+		c.Log("doRequests: ", hostPath)
+		doVhostRequestsWithHostPath(c, authz, hostPath)
+	}
+}
+
+func doVhostRequestsWithHostPath(c *check.C, authz authorizer, hostPath string) {
 	for _, tok := range []string{
 		arvadostest.ActiveToken,
 		arvadostest.ActiveToken[:15],

commit 99bc88b31986033fbc497c194907a12a97fdce61
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:31:19 2015 -0400

    5824: Modernize install page, cf. other services.

diff --git a/doc/install/install-keepdl.html.textile.liquid b/doc/install/install-keepdl.html.textile.liquid
index 6730dff..e0bd50e 100644
--- a/doc/install/install-keepdl.html.textile.liquid
+++ b/doc/install/install-keepdl.html.textile.liquid
@@ -4,8 +4,6 @@ navsection: installguide
 title: Install download server
 ...
 
-This installation guide assumes you are on a 64 bit Debian or Ubuntu system.
-
 The keepdl server provides read-only HTTP access to files stored in Keep. It serves public data to anonymous and unauthenticated clients, and accepts authentication via Arvados tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides SSL support.
 
 By convention, we use the following hostname for the download service:
@@ -19,13 +17,17 @@ This hostname should resolve from anywhere on the internet.
 
 h2. Install keepdl
 
-First add the Arvados apt repository, and then install the keepdl package.
+On Debian-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install keepdl</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems:
 
 <notextile>
-<pre><code>~$ <span class="userinput">echo "deb http://apt.arvados.org/ wheezy main" | sudo tee /etc/apt/sources.list.d/apt.arvados.org.list</span>
-~$ <span class="userinput">sudo /usr/bin/apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7</span>
-~$ <span class="userinput">sudo /usr/bin/apt-get update</span>
-~$ <span class="userinput">sudo /usr/bin/apt-get install keepdl</span>
+<pre><code>~$ <span class="userinput">sudo yum install keepdl</span>
 </code></pre>
 </notextile>
 
@@ -38,9 +40,7 @@ Usage of keepdl:
 </code></pre>
 </notextile>
 
-We recommend running @arv-git-httpd@ under "runit":https://packages.debian.org/search?keywords=runit or something similar.
-
-Your @run@ script should look something like this:
+We recommend running @keepdl@ under "runit":https://packages.debian.org/search?keywords=runit or a similar supervisor. The basic command to start @keepdl@ is:
 
 <notextile>
 <pre><code>export ARVADOS_API_HOST=<span class="userinput">uuid_prefix</span>.your.domain
@@ -54,11 +54,45 @@ The keepdl service will be accessible from anywhere on the internet, so we recom
 
 This is best achieved by putting a reverse proxy with SSL support in front of keepdl, running on port 443 and passing requests to keepdl on port 9002 (or whatever port you chose in your run script).
 
+Note: A wildcard SSL certificate is required in order to proxy keepdl effectively.
+
+For example, using Nginx:
+
+<notextile><pre>
+upstream keepdl {
+  server                127.0.0.1:<span class="userinput">9002</span>;
+}
+
+server {
+  listen                <span class="userinput">[your public IP address]</span>:443 ssl;
+  server_name           dl.<span class="userinput">uuid_prefix</span>.your.domain *.dl.<span class="userinput">uuid_prefix</span>.your.domain ~.*--dl.<span class="userinput">uuid_prefix</span>.your.domain;
+
+  proxy_connect_timeout 90s;
+  proxy_read_timeout    300s;
+
+  ssl                   on;
+  ssl_certificate       <span class="userinput"/>YOUR/PATH/TO/cert.pem</span>;
+  ssl_certificate_key   <span class="userinput"/>YOUR/PATH/TO/cert.key</span>;
+
+  location / {
+    proxy_pass          http://keepdl;
+    proxy_set_header    X-Forwarded-For $proxy_add_x_forwarded_for;
+  }
+}
+</pre></notextile>
+
 h3. Tell the API server about the keepdl service
 
-In your API server's config/application.yml file, add the following entry:
+If your wildcard certificate is valid for <code>*.<span class="userinput">uuid_prefix</span>.your.domain</code>, add the following entry to your API server's @config/application.yml@ file:
+
+<notextile>
+<pre><code>keepdl: https://%{uuid_or_pdh}--dl.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>
+
+If your wildcard certificate is valid for <code>*.dl.<span class="userinput">uuid_prefix</span>.your.domain</code>, add the following entry to your API server's @config/application.yml@ file:
 
 <notextile>
-<pre><code>keepdl: dl.<span class="userinput">uuid_prefix</span>.your.domain
+<pre><code>keepdl: https://%{uuid_or_pdh}.dl.<span class="userinput">uuid_prefix</span>.your.domain
 </code></pre>
 </notextile>

commit fa9174d4f575883826465cd29cc2f374d0ed6ed3
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:17:54 2015 -0400

    5824: Clarify difference between keepproxy and keepstore (bandwidth and convenience -- not security).

diff --git a/doc/install/install-keepproxy.html.textile.liquid b/doc/install/install-keepproxy.html.textile.liquid
index 36c49db..2b2c1e3 100644
--- a/doc/install/install-keepproxy.html.textile.liquid
+++ b/doc/install/install-keepproxy.html.textile.liquid
@@ -4,9 +4,9 @@ navsection: installguide
 title: Install Keepproxy server
 ...
 
-The Keepproxy server is a gateway into your Keep storage. Unlike the Keepstore servers, which are only accessible on the local LAN, Keepproxy is designed to provide secure access into Keep from anywhere on the internet.
+The Keepproxy server is a gateway into your Keep storage. Unlike the Keepstore servers, which are only accessible on the local LAN, Keepproxy is suitable for lower-bandwidth clients located elsewhere on the internet: a client sends a single copy of a data block, and Keepproxy sends copies to the appropriate Keepstore servers. Keepproxy also accepts requests from clients that do not compute data hashes before uploading data: notably, the browser-based upload feature in Workbench requires Keepproxy.
 
-By convention, we use the following hostname for the Keepproxy:
+By convention, we use the following hostname for the Keepproxy server:
 
 <div class="offset1">
 table(table table-bordered table-condensed).

commit 777fcbd316c112be6e0f4265ca65a9a9f226f27d
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Aug 27 22:16:26 2015 -0400

    5824: Update keepproxy usage.

diff --git a/doc/install/install-keepproxy.html.textile.liquid b/doc/install/install-keepproxy.html.textile.liquid
index f46e650..36c49db 100644
--- a/doc/install/install-keepproxy.html.textile.liquid
+++ b/doc/install/install-keepproxy.html.textile.liquid
@@ -36,12 +36,13 @@ Verify that Keepproxy is functional:
 
 <notextile>
 <pre><code>~$ <span class="userinput">keepproxy -h</span>
-Usage of default:
+Usage of keepproxy:
   -default-replicas=2: Default number of replicas to write if not specified by the client.
   -listen=":25107": Interface on which to listen for requests, in the format ipaddr:port. e.g. -listen=10.0.1.24:8000. Use -listen=:port to listen on all network interfaces.
   -no-get=false: If set, disable GET operations
   -no-put=false: If set, disable PUT operations
   -pid="": Path to write pid file
+  -timeout=15: Timeout on requests to internal Keep services (default 15 seconds)
 </code></pre>
 </notextile>
 
diff --git a/services/keepproxy/keepproxy.go b/services/keepproxy/keepproxy.go
index d0af4a5..313a285 100644
--- a/services/keepproxy/keepproxy.go
+++ b/services/keepproxy/keepproxy.go
@@ -37,7 +37,7 @@ func main() {
 		pidfile          string
 	)
 
-	flagset := flag.NewFlagSet("default", flag.ExitOnError)
+	flagset := flag.NewFlagSet("keepproxy", flag.ExitOnError)
 
 	flagset.StringVar(
 		&listen,

commit ba094b970272ece90f9ff368e18549660faa05de
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Aug 16 00:16:27 2015 -0400

    5824: Fix up error checking and early-close behavior inCollectionFileReader.

diff --git a/sdk/go/keepclient/collectionreader.go b/sdk/go/keepclient/collectionreader.go
index 929f693..eac23d0 100644
--- a/sdk/go/keepclient/collectionreader.go
+++ b/sdk/go/keepclient/collectionreader.go
@@ -8,6 +8,17 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/manifest"
 )
 
+const (
+	// After reading a data block from Keep, cfReader slices it up
+	// and sends the slices to a buffered channel to be consumed
+	// by the caller via Read().
+	//
+	// dataSliceSize is the maximum size of the slices, and
+	// therefore the maximum number of bytes that will be returned
+	// by a single call to Read().
+	dataSliceSize = 1 << 20
+)
+
 var (
 	ErrNoManifest     = errors.New("Collection has no manifest")
 	ErrNotImplemented = errors.New("Not implemented")
@@ -40,8 +51,10 @@ func (kc *KeepClient) CollectionFileReader(collection map[string]interface{}, fi
 			}
 			q = append(q, seg)
 			r.totalSize += uint64(seg.Len)
-			// Send toGet whatever it's ready to receive.
-			Q: for len(q) > 0 {
+			// Send toGet as many segments as we can until
+			// it blocks.
+		Q:
+			for len(q) > 0 {
 				select {
 				case r.toGet <- q[0]:
 					q = q[1:]
@@ -75,84 +88,127 @@ type cfReader struct {
 	// doGet() reads FileSegments from toGet, gets the data from
 	// Keep, and sends byte slices to toRead to be consumed by
 	// Read().
-	toGet        chan *manifest.FileSegment
-	toRead       chan []byte
+	toGet chan *manifest.FileSegment
+	// toRead is a buffered channel, sized to fit one full Keep
+	// block. This lets us verify checksums without having a
+	// store-and-forward delay between blocks: by the time the
+	// caller starts receiving data from block N, cfReader is
+	// starting to fetch block N+1. A larger buffer would be
+	// useful for a caller whose read speed varies a lot.
+	toRead chan []byte
 	// bytes ready to send next time someone calls Read()
-	buf          []byte
+	buf []byte
 	// Total size of the file being read. Not safe to read this
 	// until countDone is closed.
-	totalSize    uint64
-	countDone    chan struct{}
+	totalSize uint64
+	countDone chan struct{}
 	// First error encountered.
-	err          error
+	err error
+	// errNotNil is closed IFF err contains a non-nil error.
+	// Receiving from it will block until an error occurs.
+	errNotNil chan struct{}
+	// rdrClosed is closed IFF the reader's Close() method has
+	// been called. Any goroutines associated with the reader will
+	// stop and free up resources when they notice this channel is
+	// closed.
+	rdrClosed chan struct{}
 }
 
-func (r *cfReader) Read(outbuf []byte) (n int, err error) {
-	if r.err != nil {
-		return 0, r.err
+func (r *cfReader) Read(outbuf []byte) (int, error) {
+	if r.Error() != nil {
+		return 0, r.Error()
 	}
 	for r.buf == nil || len(r.buf) == 0 {
 		var ok bool
 		r.buf, ok = <-r.toRead
-		if r.err != nil {
-			return 0, r.err
+		if r.Error() != nil {
+			return 0, r.Error()
 		} else if !ok {
 			return 0, io.EOF
 		}
 	}
+	n := len(r.buf)
 	if len(r.buf) > len(outbuf) {
 		n = len(outbuf)
-	} else {
-		n = len(r.buf)
 	}
 	copy(outbuf[:n], r.buf[:n])
 	r.buf = r.buf[n:]
-	return
+	return n, nil
 }
 
 func (r *cfReader) Close() error {
-	_, _ = <-r.countDone
-	for _ = range r.toGet {
-	}
-	for _ = range r.toRead {
+	close(r.rdrClosed)
+	return r.Error()
+}
+
+func (r *cfReader) Error() error {
+	select {
+	case <-r.errNotNil:
+		return r.err
+	default:
+		return nil
 	}
-	return r.err
 }
 
 func (r *cfReader) Len() uint64 {
 	// Wait for all segments to be counted
-	_, _ = <-r.countDone
+	<-r.countDone
 	return r.totalSize
 }
 
 func (r *cfReader) doGet() {
 	defer close(r.toRead)
+GET:
 	for fs := range r.toGet {
 		rdr, _, _, err := r.keepClient.Get(fs.Locator)
 		if err != nil {
 			r.err = err
+			close(r.errNotNil)
 			return
 		}
 		var buf = make([]byte, fs.Offset+fs.Len)
 		_, err = io.ReadFull(rdr, buf)
 		if err != nil {
 			r.err = err
+			close(r.errNotNil)
 			return
 		}
-		for bOff, bLen := fs.Offset, 1<<20; bOff <= fs.Offset+fs.Len && bLen > 0; bOff += bLen {
+		for bOff, bLen := fs.Offset, dataSliceSize; bOff <= fs.Offset+fs.Len && bLen > 0; bOff += bLen {
 			if bOff+bLen > fs.Offset+fs.Len {
 				bLen = fs.Offset + fs.Len - bOff
 			}
-			r.toRead <- buf[bOff : bOff+bLen]
+			select {
+			case r.toRead <- buf[bOff : bOff+bLen]:
+			case <-r.rdrClosed:
+				// Reader is closed: no point sending
+				// anything more to toRead.
+				break GET
+			}
+		}
+		// It is possible that r.rdrClosed is closed but we
+		// never noticed because r.toRead was also ready in
+		// every select{} above. Here we check before wasting
+		// a keepclient.Get() call.
+		select {
+		case <-r.rdrClosed:
+			break GET
+		default:
 		}
 	}
+	// In case we exited the above loop early: before returning,
+	// drain the toGet channel so its sender doesn't sit around
+	// blocking forever.
+	for _ = range r.toGet {
+	}
 }
 
 func newCFReader(kc *KeepClient) (r *cfReader) {
 	r = new(cfReader)
 	r.keepClient = kc
+	r.rdrClosed = make(chan struct{})
+	r.errNotNil = make(chan struct{})
 	r.toGet = make(chan *manifest.FileSegment, 2)
-	r.toRead = make(chan []byte)
+	r.toRead = make(chan []byte, (BLOCKSIZE+dataSliceSize-1)/dataSliceSize)
 	r.countDone = make(chan struct{})
 	go r.doGet()
 	return
diff --git a/sdk/go/keepclient/collectionreader_test.go b/sdk/go/keepclient/collectionreader_test.go
index f271208..51710b7 100644
--- a/sdk/go/keepclient/collectionreader_test.go
+++ b/sdk/go/keepclient/collectionreader_test.go
@@ -20,7 +20,8 @@ type IntegrationSuite struct{}
 
 type SuccessHandler struct {
 	disk map[string][]byte
-	lock chan struct{}
+	lock chan struct{}	// channel with buffer==1: full when an operation is in progress.
+	ops  *int		// number of operations completed
 }
 
 func (h SuccessHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
@@ -34,12 +35,18 @@ func (h SuccessHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
 		pdh := fmt.Sprintf("%x+%d", md5.Sum(buf), len(buf))
 		h.lock <- struct{}{}
 		h.disk[pdh] = buf
+		if h.ops != nil {
+			(*h.ops)++
+		}
 		<- h.lock
 		resp.Write([]byte(pdh))
 	case "GET":
 		pdh := req.URL.Path[1:]
 		h.lock <- struct{}{}
 		buf, ok := h.disk[pdh]
+		if h.ops != nil {
+			(*h.ops)++
+		}
 		<- h.lock
 		if !ok {
 			resp.WriteHeader(http.StatusNotFound)
@@ -57,6 +64,14 @@ type rdrTest struct {
 	want interface{} // error or string to expect
 }
 
+func StubWithFakeServers(kc *KeepClient, h http.Handler) {
+	localRoots := make(map[string]string)
+	for i, k := range RunSomeFakeKeepServers(h, 4) {
+		localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+	}
+	kc.SetServiceRoots(localRoots, localRoots, nil)
+}
+
 func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 	arv, err := arvadosclient.MakeArvadosClient()
 	c.Assert(err, check.IsNil)
@@ -66,12 +81,11 @@ func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 	c.Assert(err, check.IsNil)
 
 	{
-		localRoots := make(map[string]string)
-		h := SuccessHandler{disk: make(map[string][]byte), lock: make(chan struct{}, 1)}
-		for i, k := range RunSomeFakeKeepServers(h, 4) {
-			localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+		h := SuccessHandler{
+			disk: make(map[string][]byte),
+			lock: make(chan struct{}, 1),
 		}
-		kc.SetServiceRoots(localRoots, localRoots, nil)
+		StubWithFakeServers(kc, h)
 		kc.PutB([]byte("foo"))
 		kc.PutB([]byte("bar"))
 		kc.PutB([]byte("Hello world\n"))
@@ -121,3 +135,49 @@ func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
 		}
 	}
 }
+
+func (s *ServerRequiredSuite) TestCollectionReaderCloseEarly(c *check.C) {
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.IsNil)
+	arv.ApiToken = arvadostest.ActiveToken
+
+	kc, err := MakeKeepClient(&arv)
+	c.Assert(err, check.IsNil)
+
+	h := SuccessHandler{
+		disk: make(map[string][]byte),
+		lock: make(chan struct{}, 1),
+		ops: new(int),
+	}
+	StubWithFakeServers(kc, h)
+	kc.PutB([]byte("foo"))
+
+	mt := ". "
+	for i := 0; i < 1000; i++ {
+		mt += "acbd18db4cc2f85cedef654fccc4a4d8+3 "
+	}
+	mt += "0:3000:foo1000.txt\n"
+
+	// Grab the stub server's lock, ensuring our cfReader doesn't
+	// get anything back from its first call to kc.Get() before we
+	// have a chance to call Close().
+	h.lock <- struct{}{}
+	opsBeforeRead := *h.ops
+
+	rdr, err := kc.CollectionFileReader(map[string]interface{}{"manifest_text": mt}, "foo1000.txt")
+	c.Assert(err, check.IsNil)
+	err = rdr.Close()
+	c.Assert(err, check.IsNil)
+	c.Assert(rdr.Error(), check.IsNil)
+
+	// Release the stub server's lock. The first GET operation will proceed.
+	<-h.lock
+
+	// doGet() should close toRead before sending any more bufs to it.
+	if what, ok := <-rdr.toRead;  ok {
+		c.Errorf("Got %+v, expected toRead to be closed", what)
+	}
+
+	// Stub should have handled exactly one GET request.
+	c.Assert(*h.ops, check.Equals, opsBeforeRead+1)
+}

commit 1ee305e5d4b80e478eee71b4b11e8221456eff15
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jul 29 04:41:26 2015 -0400

    5824: Enable blob signing in integration tests, and send keepstore logs to files.
    
    For some reason, sending keepstore logs to stdout seems to make
    keepstore crash silently when invoked from a Go integration test. Work
    around this by logging to disk, like we do with API server.

diff --git a/sdk/go/keepclient/keepclient_test.go b/sdk/go/keepclient/keepclient_test.go
index c1f6a3e..e4e459e 100644
--- a/sdk/go/keepclient/keepclient_test.go
+++ b/sdk/go/keepclient/keepclient_test.go
@@ -743,7 +743,7 @@ func (s *ServerRequiredSuite) TestPutGetHead(c *C) {
 	}
 	{
 		hash2, replicas, err := kc.PutB(content)
-		c.Check(hash2, Equals, fmt.Sprintf("%s+%d", hash, len(content)))
+		c.Check(hash2, Matches, fmt.Sprintf(`%s\+%d\b.*`, hash, len(content)))
 		c.Check(replicas, Equals, 2)
 		c.Check(err, Equals, nil)
 	}
diff --git a/sdk/python/tests/run_test_server.py b/sdk/python/tests/run_test_server.py
index 1c5162b..2b9d5f7 100644
--- a/sdk/python/tests/run_test_server.py
+++ b/sdk/python/tests/run_test_server.py
@@ -310,8 +310,9 @@ def _start_keep(n, keep_args):
     for arg, val in keep_args.iteritems():
         keep_cmd.append("{}={}".format(arg, val))
 
+    logf = open(os.path.join(TEST_TMPDIR, 'keep{}.log'.format(n)), 'a+')
     kp0 = subprocess.Popen(
-        keep_cmd, stdin=open('/dev/null'), stdout=sys.stderr)
+        keep_cmd, stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True)
     with open(_pidfile('keep{}'.format(n)), 'w') as f:
         f.write(str(kp0.pid))
 
@@ -326,10 +327,11 @@ def run_keep(blob_signing_key=None, enforce_permissions=False):
     stop_keep()
 
     keep_args = {}
-    if blob_signing_key:
-        with open(os.path.join(TEST_TMPDIR, "keep.blob_signing_key"), "w") as f:
-            keep_args['--permission-key-file'] = f.name
-            f.write(blob_signing_key)
+    if not blob_signing_key:
+        blob_signing_key = 'zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc'
+    with open(os.path.join(TEST_TMPDIR, "keep.blob_signing_key"), "w") as f:
+        keep_args['--blob-signing-key-file'] = f.name
+        f.write(blob_signing_key)
     if enforce_permissions:
         keep_args['--enforce-permissions'] = 'true'
 
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py
index 13fc88d..ac7dd1b 100644
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -1144,7 +1144,7 @@ class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
         c2.save()
 
         c1.update()
-        self.assertRegexpMatches(c1.manifest_text(), r"\. e65075d550f9b5bf9992fa1d71a131be\+3 7ac66c0f148de9519b8bd264312c4d64\+7\+A[a-f0-9]{40}@[a-f0-9]{8} 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
+        self.assertRegexpMatches(c1.manifest_text(), r"\. e65075d550f9b5bf9992fa1d71a131be\+3\S* 7ac66c0f148de9519b8bd264312c4d64\+7\S* 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
 
 
 if __name__ == '__main__':

commit cdecbc7f736831dfd6017d975753bec66433e5bf
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jul 29 03:33:19 2015 -0400

    5824: Add Content-Length header.

diff --git a/services/keepdl/handler.go b/services/keepdl/handler.go
index 03b3e26..30b4b64 100644
--- a/services/keepdl/handler.go
+++ b/services/keepdl/handler.go
@@ -239,6 +239,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 			w.Header().Set("Content-Type", t)
 		}
 	}
+	w.Header().Set("Content-Length", fmt.Sprintf("%d", rdr.Len()))
 
 	w.WriteHeader(http.StatusOK)
 	_, err = io.Copy(w, rdr)

commit dbebb56bc61fb52928edfcfca64cfec9a697362c
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jul 29 01:56:55 2015 -0400

    5824: Use vhosts in curl integration tests. Add large file test.

diff --git a/services/keepdl/server_test.go b/services/keepdl/server_test.go
index 5864315..6fef9b8 100644
--- a/services/keepdl/server_test.go
+++ b/services/keepdl/server_test.go
@@ -3,6 +3,9 @@ package main
 import (
 	"crypto/md5"
 	"fmt"
+	"io"
+	"io/ioutil"
+	"net"
 	"os/exec"
 	"strings"
 	"testing"
@@ -25,17 +28,17 @@ func (s *IntegrationSuite) TestNoToken(c *check.C) {
 		"",
 		"bogustoken",
 	} {
-		hdr, body := s.runCurl(c, token, "/collections/"+arvadostest.FooCollection+"/foo")
+		hdr, body, _ := s.runCurl(c, token, "dl.example.com", "/collections/"+arvadostest.FooCollection+"/foo")
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
 		c.Check(body, check.Equals, "")
 
 		if token != "" {
-			hdr, body = s.runCurl(c, token, "/collections/download/"+arvadostest.FooCollection+"/"+token+"/foo")
+			hdr, body, _ = s.runCurl(c, token, "dl.example.com", "/collections/download/"+arvadostest.FooCollection+"/"+token+"/foo")
 			c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 			c.Check(body, check.Equals, "")
 		}
 
-		hdr, body = s.runCurl(c, token, "/bad-route")
+		hdr, body, _ = s.runCurl(c, token, "dl.example.com", "/bad-route")
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 		c.Check(body, check.Equals, "")
 	}
@@ -64,12 +67,58 @@ func (s *IntegrationSuite) Test404(c *check.C) {
 		"/collections/" + arvadostest.NonexistentCollection + "/theperthcountyconspiracy",
 		"/collections/download/" + arvadostest.NonexistentCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
 	} {
-		hdr, body := s.runCurl(c, arvadostest.ActiveToken, uri)
+		hdr, body, _ := s.runCurl(c, arvadostest.ActiveToken, "dl.example.com", uri)
 		c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
 		c.Check(body, check.Equals, "")
 	}
 }
 
+func (s *IntegrationSuite) Test1GBFile(c *check.C) {
+	if testing.Short() {
+		c.Skip("skipping 1GB integration test in short mode")
+	}
+	s.test100BlockFile(c, 10000000)
+}
+
+func (s *IntegrationSuite) Test300MBFile(c *check.C) {
+	s.test100BlockFile(c, 3000000)
+}
+
+func (s *IntegrationSuite) test100BlockFile(c *check.C, blocksize int) {
+	testdata := make([]byte, blocksize)
+	for i := 0; i < blocksize; i++ {
+		testdata[i] = byte(' ')
+	}
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.Equals, nil)
+	arv.ApiToken = arvadostest.ActiveToken
+	kc, err := keepclient.MakeKeepClient(&arv)
+	c.Assert(err, check.Equals, nil)
+	loc, _, err := kc.PutB(testdata[:])
+	c.Assert(err, check.Equals, nil)
+	mtext := "."
+	for i := 0; i < 100; i++ {
+		mtext = mtext + " " + loc
+	}
+	mtext = mtext + fmt.Sprintf(" 0:%d00:testdata.bin\n", blocksize)
+	coll := map[string]interface{}{}
+	err = arv.Create("collections",
+		map[string]interface{}{
+			"collection": map[string]interface{}{
+				"name": fmt.Sprintf("testdata blocksize=%d", blocksize),
+				"manifest_text": mtext,
+			},
+		}, &coll)
+	c.Assert(err, check.Equals, nil)
+	uuid := coll["uuid"].(string)
+	
+	hdr, body, size := s.runCurl(c, arv.ApiToken, uuid + ".dl.example.com", "/testdata.bin")
+	c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+	c.Check(hdr, check.Matches, `(?si).*Content-length: `+fmt.Sprintf("%d00", blocksize)+`\r\n.*`)
+	c.Check([]byte(body)[:1234], check.DeepEquals, testdata[:1234])
+	c.Check(size, check.Equals, int64(blocksize)*100)
+}
+
 func (s *IntegrationSuite) Test200(c *check.C) {
 	anonymousTokens = []string{arvadostest.AnonymousToken}
 	arv, err := arvadosclient.MakeArvadosClient()
@@ -86,19 +135,13 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 		{"tokensobogus", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
 		{arvadostest.ActiveToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
 		{arvadostest.AnonymousToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		// Anonymously accessible user agreement. These should
-		// start working when CollectionFileReader provides
-		// real data instead of fake/stub data.
+		// Anonymously accessible user agreement.
 		{"", "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 		{arvadostest.ActiveToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 		{arvadostest.SpectatorToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 		{arvadostest.SpectatorToken, "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 	} {
-		hdr, body := s.runCurl(c, spec[0], spec[1])
-		if strings.HasPrefix(hdr, "HTTP/1.1 501 Not Implemented\r\n") && body == "" {
-			c.Log("Not implemented!")
-			continue
-		}
+		hdr, body, _ := s.runCurl(c, spec[0], "dl.example.com", spec[1])
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
 		if strings.HasSuffix(spec[1], ".txt") {
 			c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
@@ -111,15 +154,34 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 }
 
 // Return header block and body.
-func (s *IntegrationSuite) runCurl(c *check.C, token, uri string, args ...string) (hdr, body string) {
+func (s *IntegrationSuite) runCurl(c *check.C, token, host, uri string, args ...string) (hdr, bodyPart string, bodySize int64) {
 	curlArgs := []string{"--silent", "--show-error", "--include"}
+	testHost, testPort, _ := net.SplitHostPort(s.testServer.Addr)
+	curlArgs = append(curlArgs, "--resolve", host + ":" + testPort + ":" + testHost)
 	if token != "" {
 		curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
 	}
 	curlArgs = append(curlArgs, args...)
-	curlArgs = append(curlArgs, "http://"+s.testServer.Addr+uri)
+	curlArgs = append(curlArgs, "http://"+host+":"+testPort+uri)
 	c.Log(fmt.Sprintf("curlArgs == %#v", curlArgs))
-	output, err := exec.Command("curl", curlArgs...).CombinedOutput()
+	cmd := exec.Command("curl", curlArgs...)
+	stdout, err := cmd.StdoutPipe()
+	c.Assert(err, check.Equals, nil)
+	cmd.Stderr = cmd.Stdout
+	go cmd.Start()
+	buf := make([]byte, 2<<27)
+	n, err := io.ReadFull(stdout, buf)
+	// Discard (but measure size of) anything past 128 MiB.
+	var discarded int64
+	if err == io.ErrUnexpectedEOF {
+		err = nil
+		buf = buf[:n]
+	} else {
+		c.Assert(err, check.Equals, nil)
+		discarded, err = io.Copy(ioutil.Discard, stdout)
+		c.Assert(err, check.Equals, nil)
+	}
+	err = cmd.Wait()
 	// Without "-f", curl exits 0 as long as it gets a valid HTTP
 	// response from the server, even if the response status
 	// indicates that the request failed. In our test suite, we
@@ -127,10 +189,11 @@ func (s *IntegrationSuite) runCurl(c *check.C, token, uri string, args ...string
 	// headers ourselves. If curl exits non-zero, our testing
 	// environment is broken.
 	c.Assert(err, check.Equals, nil)
-	hdrsAndBody := strings.SplitN(string(output), "\r\n\r\n", 2)
+	hdrsAndBody := strings.SplitN(string(buf), "\r\n\r\n", 2)
 	c.Assert(len(hdrsAndBody), check.Equals, 2)
 	hdr = hdrsAndBody[0]
-	body = hdrsAndBody[1]
+	bodyPart = hdrsAndBody[1]
+	bodySize = int64(len(bodyPart)) + discarded
 	return
 }
 

commit 310ccee9f772373256fd836e421f918349d6416f
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Jul 23 01:20:28 2015 -0400

    5824: Support vhost-based collection lookups.

diff --git a/sdk/go/arvadostest/fixtures.go b/sdk/go/arvadostest/fixtures.go
index 87b28f8..3040e0a 100644
--- a/sdk/go/arvadostest/fixtures.go
+++ b/sdk/go/arvadostest/fixtures.go
@@ -7,6 +7,8 @@ const (
 	FooCollection         = "zzzzz-4zz18-fy296fx3hot09f7"
 	NonexistentCollection = "zzzzz-4zz18-totallynotexist"
 	HelloWorldCollection  = "zzzzz-4zz18-4en62shvi99lxd4"
+	FooPdh                = "1f4b0bc7583c2a7f9102c395f4ffc5e3+45"
+	HelloWorldPdh         = "55713e6a34081eb03609e7ad5fcad129+62"
 	PathologicalManifest  = ". acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 73feffa4b7f6bb68e44cf984c85f6e88+3+Z+K at xyzzy acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero at 0 0:1:f 1:0:zero at 1 1:4:ooba 4:0:zero at 4 5:1:r 5:4:rbaz 9:0:zero at 9\n" +
 		"./overlapReverse acbd18db4cc2f85cedef654fccc4a4d8+3 acbd18db4cc2f85cedef654fccc4a4d8+3 5:1:o 4:2:oo 2:4:ofoo\n" +
 		"./segmented acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 0:1:frob 5:1:frob 1:1:frob 1:2:oof 0:1:oof 5:0:frob 3:1:frob\n" +
diff --git a/sdk/go/auth/auth.go b/sdk/go/auth/auth.go
index 4a719e9..3c7888a 100644
--- a/sdk/go/auth/auth.go
+++ b/sdk/go/auth/auth.go
@@ -1,13 +1,14 @@
 package auth
 
 import (
+	"encoding/base64"
 	"net/http"
 	"net/url"
 	"strings"
 )
 
 type Credentials struct {
-	Tokens []string
+	Tokens     []string
 }
 
 func NewCredentials() *Credentials {
@@ -20,6 +21,15 @@ func NewCredentialsFromHTTPRequest(r *http.Request) *Credentials {
 	return c
 }
 
+// EncodeTokenCookie accepts a token and returns a byte slice suitable
+// for use as a cookie value, such that it will be decoded correctly
+// by LoadTokensFromHTTPRequest.
+var EncodeTokenCookie func([]byte) string = base64.URLEncoding.EncodeToString
+
+// DecodeTokenCookie accepts a cookie value and returns the encoded
+// token.
+var DecodeTokenCookie func(string) ([]byte, error) = base64.URLEncoding.DecodeString
+
 // LoadTokensFromHttpRequest loads all tokens it can find in the
 // headers and query string of an http query.
 func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
@@ -51,6 +61,8 @@ func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
 		a.Tokens = append(a.Tokens, val...)
 	}
 
+	a.loadTokenFromCookie(r)
+
 	// TODO: Load token from Rails session cookie (if Rails site
 	// secret is known)
 }
@@ -59,3 +71,15 @@ func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
 // LoadTokensFromHttpRequest() that [or how] we should read and parse
 // the request body. This has to be requested explicitly by the
 // application.
+
+func (a *Credentials) loadTokenFromCookie(r *http.Request) {
+	cookie, err := r.Cookie("api_token")
+	if err != nil || len(cookie.Value) == 0 {
+		return
+	}
+	token, err := DecodeTokenCookie(cookie.Value)
+	if err != nil {
+		return
+	}
+	a.Tokens = append(a.Tokens, string(token))
+}
diff --git a/services/keepdl/doc.go b/services/keepdl/doc.go
index 65c7f19..d877356 100644
--- a/services/keepdl/doc.go
+++ b/services/keepdl/doc.go
@@ -1,28 +1,158 @@
 // Keepdl provides read-only HTTP access to files stored in Keep. It
 // serves public data to anonymous and unauthenticated clients, and
-// accepts authentication via Arvados tokens. It can be installed
-// anywhere with access to Keep services, typically behind a web proxy
-// that provides SSL support.
+// serves private data to clients that supply Arvados API tokens. It
+// can be installed anywhere with access to Keep services, typically
+// behind a web proxy that supports TLS.
 //
-// Given that this amounts to a web hosting service for arbitrary
-// content, it is vital to ensure that at least one of the following is
-// true:
+// Starting the server
 //
-// Usage
-//
-// Listening:
+// Serve HTTP requests at port 1234 on all interfaces:
 //
 //   keepdl -address=:1234
 //
-// Start an HTTP server on port 1234.
+// Serve HTTP requests at port 1234 on the interface with IP address 1.2.3.4:
 //
 //   keepdl -address=1.2.3.4:1234
 //
-// Start an HTTP server on port 1234, on the interface with IP address 1.2.3.4.
+// Proxy configuration
 //
 // Keepdl does not support SSL natively. Typically, it is installed
 // behind a proxy like nginx.
 //
+// Here is an example nginx configuration.
+//
+//	http {
+//	  upstream keepdl {
+//	    server localhost:1234;
+//	  }
+//	  server {
+//	    listen *:443 ssl;
+//	    server_name dl.example.com *.dl.example.com ~.*--dl.example.com;
+//	    ssl_certificate /root/wildcard.example.com.crt;
+//	    ssl_certificate_key /root/wildcard.example.com.key;
+//	    location  / {
+//	      proxy_pass http://keepdl;
+//	      proxy_set_header Host $host;
+//	      proxy_set_header X-Forwarded-For $remote_addr;
+//	    }
+//	  }
+//	}
+//
+// It is not necessary to run keepdl on the same host as the nginx
+// proxy. However, TLS is not used between nginx and keepdl, so
+// intervening networks must be secured by other means.
+//
+// Download URLs
+//
+// The following "same origin" URL patterns are supported for public
+// collections (i.e., collections which can be served by keepdl
+// without making use of any credentials supplied by the client). See
+// "Same-origin mode" below.
+//
+//   http://dl.example.com/c=uuid_or_pdh/path/file.txt
+//   http://dl.example.com/c=uuid_or_pdh/path/t=TOKEN/file.txt
+//
+// The following "multiple origin" URL patterns are supported for all
+// collections:
+//
+//   http://uuid_or_pdh--dl.example.com/path/file.txt
+//   http://uuid_or_pdh--dl.example.com/t=/path/file.txt
+//   http://uuid_or_pdh--dl.example.com/t=TOKEN/path/file.txt
+//
+// In the "multiple origin" form, the string "--" can be replaced with
+// "." with identical results (assuming the upstream proxy is
+// configured accordingly). These two are equivalent:
+//
+//   http://uuid_or_pdh--dl.example.com/path/file.txt
+//   http://uuid_or_pdh.dl.example.com/path/file.txt
+//
+// The first form minimizes the cost and effort of deploying a
+// wildcard TLS certificate for *.dl.example.com. The second form is
+// likely to be easier to configure, and more efficient to run, on an
+// upstream proxy.
+//
+// In all of the above forms, the "dl.example.com" part can be
+// anything at all.
+//
+// In all of the above forms, the "uuid_or_pdh" part can be either a
+// collection UUID or a portable data hash with the "+" character
+// replaced by "-".
+//
+// Assuming there is a collection with UUID
+// zzzzz-4zz18-znfnqtbbv4spc3w and portable data hash
+// 1f4b0bc7583c2a7f9102c395f4ffc5e3+45, the following URLs are
+// interchangeable:
+//
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w.dl.example.com/t=/foo
+//   http://zzzzz-4zz18-znfnqtbbv4spc3w--dl.example.com/t=/foo
+//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--foo.example.com/foo
+//   http://1f4b0bc7583c2a7f9102c395f4ffc5e3+45--.invalid/foo
+//
+// Authorization mechanisms
+//
+// A token can be provided in an Authorization header:
+//
+//   Authorization: OAuth2 o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// A base64-encoded token can be provided in a cookie named "api_token":
+//
+//   Cookie: api_token=bzA3ajRweDdSbEpLNEN1TVlwN0MwTERUNEN6UjFKMXFCRTVBdm83ZUNjVWpPVGlreEs=
+//
+// A token can be provided in an URL-encoded query string:
+//
+//   GET /foo.txt?api_token=o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// A suitably encoded token can be provided in a POST body if the
+// request has a content type of application/x-www-form-urlencoded or
+// multipart/form-data:
+//
+//   POST /foo.txt
+//   Content-Type: application/x-www-form-urlencoded
+//   [...]
+//   api_token=o07j4px7RlJK4CuMYp7C0LDT4CzR1J1qBE5Avo7eCcUjOTikxK
+//
+// If a token is provided in a query string or in a POST request, the
+// response is an HTTP 303 redirect to an equivalent GET request, with
+// the token stripped from the query string and added to a cookie
+// instead.
+//
+// Compatibility
+//
+// Client-provided authorization tokens are ignored if the client does
+// not provide a Host header.
+//
+// In order to use the query string or a POST form authorization
+// mechanisms, the client must follow 303 redirects; the client must
+// accept cookies with a 303 response and send those cookies when
+// performing the redirect; and either the client or an intervening
+// proxy must resolve a relative URL ("//host/path") if given in a
+// response Location header.
+//
+// Intranet mode
+//
+// Normally, Keepdl accepts requests for multiple collections using
+// the same host name, provided the client's credentials are not being
+// used. This provides insufficient XSS protection in an installation
+// where the "anonymously accessible" data is not truly public, but
+// merely protected by network topology.
+//
+// In such cases -- for example, a site which is not reachable from
+// the internet, where some data is world-readable from Arvados's
+// perspective but is intended to be available only to users within
+// the local network -- the upstream proxy should configured to return
+// 401 for all paths beginning with "/c=".
+//
+// Same-origin mode
+//
+// Without the same-origin protection outlined above, a web page
+// stored in collection X could execute JavaScript code that uses the
+// current viewer's credentials to download additional data from
+// collection Y -- data which is accessible to the current viewer, but
+// not to the author of collection X -- from the same origin
+// (``https://dl.example.com/'') and upload it to some other site
+// chosen by the author of collection X.
+//
 package main
 
 // TODO(TC): Implement
@@ -31,7 +161,7 @@ package main
 //
 // Normally, Keepdl is installed using a wildcard DNS entry and a
 // wildcard HTTPS certificate, serving data from collection X at
-// ``https://X.dl.example.com/path/file.ext''.
+// ``https://X--dl.example.com/path/file.ext''.
 //
 // It will also serve publicly accessible data at
 // ``https://dl.example.com/collections/X/path/file.txt'', but it does not
@@ -48,10 +178,4 @@ package main
 //
 //   keepdl -trust-all-content [...]
 //
-// In the general case, this should not be enabled: A web page stored
-// in collection X can execute JavaScript code that uses the current
-// viewer's credentials to download additional data -- data which is
-// accessible to the current viewer, but not to the author of
-// collection X -- from the same origin (``https://dl.example.com/'')
-// and upload it to some other site chosen by the author of collection
-// X.
+// In the general case, this should not be enabled: 
diff --git a/services/keepdl/handler.go b/services/keepdl/handler.go
index 04af920..03b3e26 100644
--- a/services/keepdl/handler.go
+++ b/services/keepdl/handler.go
@@ -2,11 +2,14 @@ package main
 
 import (
 	"fmt"
+	"html"
 	"io"
 	"mime"
 	"net/http"
+	"net/url"
 	"os"
 	"strings"
+	"time"
 
 	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
 	"git.curoverse.com/arvados.git/sdk/go/auth"
@@ -25,26 +28,49 @@ func init() {
 	anonymousTokens = []string{}
 }
 
+// return s if s is a UUID or a PDH, otherwise ""
+func parseCollectionIdFromDNSName(s string) string {
+	// Strip domain.
+	if i := strings.IndexRune(s, '.'); i >= 0 {
+		s = s[:i]
+	}
+	// Names like {uuid}--dl.example.com serve the same purpose as
+	// {uuid}.dl.example.com but can reduce cost/effort of using
+	// [additional] wildcard certificates.
+	if i := strings.Index(s, "--"); i >= 0 {
+		s = s[:i]
+	}
+	if !arvadosclient.UUIDMatch(s) && !arvadosclient.PDHMatch(s) {
+		return ""
+	}
+	return s
+}
+
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
-	var statusCode int
+	var statusCode = 0
 	var statusText string
 
 	w := httpserver.WrapResponseWriter(wOrig)
 	defer func() {
-		if statusCode > 0 {
-			if w.WroteStatus() == 0 {
-				w.WriteHeader(statusCode)
-			} else {
-				httpserver.Log(r.RemoteAddr, "WARNING",
-					fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
-			}
+		if statusCode == 0 {
+			statusCode = w.WroteStatus()
+		} else if w.WroteStatus() == 0 {
+			w.WriteHeader(statusCode)
+		} else if w.WroteStatus() != statusCode {
+			httpserver.Log(r.RemoteAddr, "WARNING",
+				fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
 		}
 		if statusText == "" {
 			statusText = http.StatusText(statusCode)
 		}
-		httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.URL.Path)
+		httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
 	}()
 
+	if r.Method != "GET" && r.Method != "POST" {
+		statusCode, statusText = http.StatusMethodNotAllowed, r.Method
+		return
+	}
+
 	arv := clientPool.Get()
 	if arv == nil {
 		statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
@@ -54,17 +80,70 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 
 	pathParts := strings.Split(r.URL.Path[1:], "/")
 
-	if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
-		statusCode = http.StatusNotFound
-		return
-	}
-
 	var targetId string
 	var targetPath []string
 	var tokens []string
 	var reqTokens []string
 	var pathToken bool
-	if len(pathParts) >= 5 && pathParts[1] == "download" {
+
+	if targetId = parseCollectionIdFromDNSName(r.Host); targetId != "" {
+		// "http://{id}.domain.example.com/{path}" form
+		if t := r.FormValue("api_token"); t != "" {
+			// ...with explicit token in query string or
+			// form in POST body. We must encrypt the
+			// token such that it can only be used for
+			// this collection; put it in an HttpOnly
+			// cookie; and redirect to the same URL with
+			// the query param redacted, and method =
+			// GET.
+			//
+			// The HttpOnly flag is necessary to prevent
+			// JavaScript code (included in, or loaded by,
+			// a page in the collection being served) from
+			// employing the user's token beyond reading
+			// other files in the same domain, i.e., same
+			// the collection.
+			//
+			// The 303 redirect is necessary in the case
+			// of a GET request to avoid exposing the
+			// token in the Location bar, and in the case
+			// of a POST request to avoid raising warnings
+			// when the user refreshes the resulting page.
+			http.SetCookie(w, &http.Cookie{
+				Name:    "api_token",
+				Value:   auth.EncodeTokenCookie([]byte(t)),
+				Path:    "/",
+				Expires: time.Now().AddDate(10,0,0),
+			})
+			redir := (&url.URL{Host: r.Host, Path: r.URL.Path}).String()
+
+			w.Header().Add("Location", redir)
+			statusCode, statusText = http.StatusSeeOther, redir
+			w.WriteHeader(statusCode)
+			io.WriteString(w, `<A href="`)
+			io.WriteString(w, html.EscapeString(redir))
+			io.WriteString(w, `">Continue</A>`)
+			return
+		} else if strings.HasPrefix(pathParts[0], "t=") {
+			// ...with explicit token in path,
+			// "{...}.com/t={token}/{path}".  This form
+			// must only be used to pass scoped tokens
+			// that give permission for a single
+			// collection. See FormValue case above.
+			tokens = []string{pathParts[0][2:]}
+			targetPath = pathParts[1:]
+			pathToken = true
+		} else {
+			// ...with cookie, Authorization header, or
+			// no token at all
+			reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+			tokens = append(reqTokens, anonymousTokens...)
+			targetPath = pathParts
+		}
+	} else if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
+		statusCode = http.StatusNotFound
+		return
+	} else if len(pathParts) >= 5 && pathParts[1] == "download" {
 		// "/collections/download/{id}/{token}/path..." form:
 		// Don't use our configured anonymous tokens,
 		// Authorization headers, etc.  Just use the token in
@@ -86,7 +165,6 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	found := false
 	for _, arv.ApiToken = range tokens {
 		err := arv.Get("collections", targetId, nil, &collection)
-		httpserver.Log(err)
 		if err == nil {
 			// Success
 			found = true
@@ -131,8 +209,8 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		// someone trying (anonymously) to download public
 		// data that has been deleted.  Allow a referrer to
 		// provide this context somehow?
-		statusCode = http.StatusUnauthorized
 		w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"")
+		statusCode = http.StatusUnauthorized
 		return
 	}
 
@@ -162,6 +240,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		}
 	}
 
+	w.WriteHeader(http.StatusOK)
 	_, err = io.Copy(w, rdr)
 	if err != nil {
 		statusCode, statusText = http.StatusBadGateway, err.Error()
diff --git a/services/keepdl/handler_test.go b/services/keepdl/handler_test.go
new file mode 100644
index 0000000..a1f5e1a
--- /dev/null
+++ b/services/keepdl/handler_test.go
@@ -0,0 +1,218 @@
+package main
+
+import (
+	"html"
+	"io/ioutil"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"regexp"
+	"strings"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	"git.curoverse.com/arvados.git/sdk/go/auth"
+	check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&UnitSuite{})
+
+type UnitSuite struct {}
+
+func mustParseURL(s string) *url.URL {
+	r, err := url.Parse(s)
+	if err != nil {
+		panic("parse URL: " + s)
+	}
+	return r
+}
+
+func (s *IntegrationSuite) TestVhost404(c *check.C) {
+	for _, testURL := range []string{
+		arvadostest.NonexistentCollection + ".example.com/theperthcountyconspiracy",
+		arvadostest.NonexistentCollection + ".example.com/t=" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
+	} {
+		resp := httptest.NewRecorder()
+		req := &http.Request{
+			Method: "GET",
+			URL: mustParseURL(testURL),
+		}
+		(&handler{}).ServeHTTP(resp, req)
+		c.Check(resp.Code, check.Equals, http.StatusNotFound)
+		c.Check(resp.Body.String(), check.Equals, "")
+	}
+}
+
+type authorizer func(*http.Request, string) int
+
+func (s *IntegrationSuite) TestVhostViaAuthzHeader(c *check.C) {
+	doVhostRequests(c, authzViaAuthzHeader)
+}
+func authzViaAuthzHeader(r *http.Request, tok string) int {
+	r.Header.Add("Authorization", "OAuth2 " + tok)
+	return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaCookieValue(c *check.C) {
+	doVhostRequests(c, authzViaCookieValue)
+}
+func authzViaCookieValue(r *http.Request, tok string) int {
+	r.AddCookie(&http.Cookie{
+		Name: "api_token",
+		Value: auth.EncodeTokenCookie([]byte(tok)),
+	})
+	return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaPath(c *check.C) {
+	doVhostRequests(c, authzViaPath)
+}
+func authzViaPath(r *http.Request, tok string) int {
+	r.URL.Path = "/t=" + tok + r.URL.Path
+	return http.StatusNotFound
+}
+
+func (s *IntegrationSuite) TestVhostViaQueryString(c *check.C) {
+	doVhostRequests(c, authzViaQueryString)
+}
+func authzViaQueryString(r *http.Request, tok string) int {
+	r.URL.RawQuery = "api_token=" + tok
+	return http.StatusUnauthorized
+}
+
+func (s *IntegrationSuite) TestVhostViaPOST(c *check.C) {
+	doVhostRequests(c, authzViaPOST)
+}
+func authzViaPOST(r *http.Request, tok string) int {
+	r.Method = "POST"
+	r.Header.Add("Content-Type", "application/x-www-form-urlencoded")
+	r.Body = ioutil.NopCloser(strings.NewReader(
+		url.Values{"api_token": {tok}}.Encode()))
+	return http.StatusUnauthorized
+}
+
+// Try some combinations of {url, token} using the given authorization
+// mechanism, and verify the result is correct.
+func doVhostRequests(c *check.C, authz authorizer) {
+	hostPath := arvadostest.FooCollection + ".example.com/foo"
+	for _, tok := range []string{
+		arvadostest.ActiveToken,
+		arvadostest.ActiveToken[:15],
+		arvadostest.SpectatorToken,
+		"bogus",
+		"",
+	} {
+		u := mustParseURL("http://" + hostPath)
+		req := &http.Request{
+			Method: "GET",
+			Host: u.Host,
+			URL: u,
+			Header: http.Header{},
+		}
+		failCode := authz(req, tok)
+		resp := doReq(req)
+		code, body := resp.Code, resp.Body.String()
+		if tok == arvadostest.ActiveToken {
+			c.Check(code, check.Equals, http.StatusOK)
+			c.Check(body, check.Equals, "foo")
+		} else {
+			c.Check(code >= 400, check.Equals, true)
+			c.Check(code < 500, check.Equals, true)
+			if tok == arvadostest.SpectatorToken {
+				// Valid token never offers to retry
+				// with different credentials.
+				c.Check(code, check.Equals, http.StatusNotFound)
+			} else {
+				// Invalid token can ask to retry
+				// depending on the authz method.
+				c.Check(code, check.Equals, failCode)
+			}
+			c.Check(body, check.Equals, "")
+		}
+	}
+}
+
+func doReq(req *http.Request) *httptest.ResponseRecorder {
+	resp := httptest.NewRecorder()
+	(&handler{}).ServeHTTP(resp, req)
+	if resp.Code != http.StatusSeeOther {
+		return resp
+	}
+	cookies := (&http.Response{Header: resp.Header()}).Cookies()
+	u, _ := req.URL.Parse(resp.Header().Get("Location"))
+	req = &http.Request{
+		Method: "GET",
+		Host: u.Host,
+		URL: u,
+		Header: http.Header{},
+	}
+	for _, c := range cookies {
+		req.AddCookie(c)
+	}
+	return doReq(req)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectQueryTokenToCookie(c *check.C) {
+	s.testVhostRedirectTokenToCookie(c, "GET",
+		arvadostest.FooCollection + ".example.com/foo",
+		"?api_token=" + arvadostest.ActiveToken,
+		"text/plain",
+		"",
+		http.StatusOK,
+	)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie(c *check.C) {
+	s.testVhostRedirectTokenToCookie(c, "POST",
+		arvadostest.FooCollection + ".example.com/foo",
+		"",
+		"application/x-www-form-urlencoded",
+		url.Values{"api_token": {arvadostest.ActiveToken}}.Encode(),
+		http.StatusOK,
+	)
+}
+
+func (s *IntegrationSuite) TestVhostRedirectPOSTFormTokenToCookie404(c *check.C) {
+	s.testVhostRedirectTokenToCookie(c, "POST",
+		arvadostest.FooCollection + ".example.com/foo",
+		"",
+		"application/x-www-form-urlencoded",
+		url.Values{"api_token": {arvadostest.SpectatorToken}}.Encode(),
+		http.StatusNotFound,
+	)
+}
+
+func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, hostPath, queryString, contentType, body string, expectStatus int) {
+	u, _ := url.Parse(`http://` + hostPath + queryString)
+	req := &http.Request{
+		Method: method,
+		Host: u.Host,
+		URL: u,
+		Header: http.Header{"Content-Type": {contentType}},
+		Body: ioutil.NopCloser(strings.NewReader(body)),
+	}
+
+	resp := httptest.NewRecorder()
+	(&handler{}).ServeHTTP(resp, req)
+	c.Assert(resp.Code, check.Equals, http.StatusSeeOther)
+	c.Check(resp.Body.String(), check.Matches, `.*href="//` + regexp.QuoteMeta(html.EscapeString(hostPath)) + `".*`)
+	cookies := (&http.Response{Header: resp.Header()}).Cookies()
+
+	u, _ = u.Parse(resp.Header().Get("Location"))
+	req = &http.Request{
+		Method: "GET",
+		Host: u.Host,
+		URL: u,
+		Header: http.Header{},
+	}
+	for _, c := range cookies {
+		req.AddCookie(c)
+	}
+
+	resp = httptest.NewRecorder()
+	(&handler{}).ServeHTTP(resp, req)
+	c.Check(resp.Header().Get("Location"), check.Equals, "")
+	c.Check(resp.Code, check.Equals, expectStatus)
+	if expectStatus == http.StatusOK {
+		c.Check(resp.Body.String(), check.Equals, "foo")
+	}
+}

commit 5be8366eb525129c9b6d2b710365cf4dbc9392c0
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jun 24 23:33:08 2015 -0400

    5824: add (*KeepClient)CollectionFileReader()

diff --git a/sdk/go/arvadostest/fixtures.go b/sdk/go/arvadostest/fixtures.go
new file mode 100644
index 0000000..87b28f8
--- /dev/null
+++ b/sdk/go/arvadostest/fixtures.go
@@ -0,0 +1,17 @@
+package arvadostest
+
+const (
+	SpectatorToken        = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
+	ActiveToken           = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
+	AnonymousToken        = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
+	FooCollection         = "zzzzz-4zz18-fy296fx3hot09f7"
+	NonexistentCollection = "zzzzz-4zz18-totallynotexist"
+	HelloWorldCollection  = "zzzzz-4zz18-4en62shvi99lxd4"
+	PathologicalManifest  = ". acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 73feffa4b7f6bb68e44cf984c85f6e88+3+Z+K at xyzzy acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero at 0 0:1:f 1:0:zero at 1 1:4:ooba 4:0:zero at 4 5:1:r 5:4:rbaz 9:0:zero at 9\n" +
+		"./overlapReverse acbd18db4cc2f85cedef654fccc4a4d8+3 acbd18db4cc2f85cedef654fccc4a4d8+3 5:1:o 4:2:oo 2:4:ofoo\n" +
+		"./segmented acbd18db4cc2f85cedef654fccc4a4d8+3 37b51d194a7513e45b56f6524f2d51f2+3 0:1:frob 5:1:frob 1:1:frob 1:2:oof 0:1:oof 5:0:frob 3:1:frob\n" +
+		`./foo\040b\141r acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:baz` + "\n" +
+		`./foo\040b\141r acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:b\141z\040w\141z` + "\n" +
+		"./foo acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:zero 0:3:foo\n" +
+		". acbd18db4cc2f85cedef654fccc4a4d8+3 0:0:foo/zero 0:3:foo/foo\n"
+)
diff --git a/sdk/go/keepclient/collectionreader.go b/sdk/go/keepclient/collectionreader.go
new file mode 100644
index 0000000..929f693
--- /dev/null
+++ b/sdk/go/keepclient/collectionreader.go
@@ -0,0 +1,159 @@
+package keepclient
+
+import (
+	"errors"
+	"io"
+	"os"
+
+	"git.curoverse.com/arvados.git/sdk/go/manifest"
+)
+
+var (
+	ErrNoManifest     = errors.New("Collection has no manifest")
+	ErrNotImplemented = errors.New("Not implemented")
+)
+
+// CollectionFileReader returns an io.Reader that reads file content
+// from a collection. The filename must be given relative to the root
+// of the collection, without a leading "./".
+func (kc *KeepClient) CollectionFileReader(collection map[string]interface{}, filename string) (*cfReader, error) {
+	mText, ok := collection["manifest_text"].(string)
+	if !ok {
+		return nil, ErrNoManifest
+	}
+	m := manifest.Manifest{Text: mText}
+	rdrChan := make(chan *cfReader)
+	go func() {
+		// q is a queue of FileSegments that we have received but
+		// haven't yet been able to send to toGet.
+		var q []*manifest.FileSegment
+		var r *cfReader
+		for seg := range m.FileSegmentIterByName(filename) {
+			if r == nil {
+				// We've just discovered that the
+				// requested filename does appear in
+				// the manifest, so we can return a
+				// real reader (not nil) from
+				// CollectionFileReader().
+				r = newCFReader(kc)
+				rdrChan <- r
+			}
+			q = append(q, seg)
+			r.totalSize += uint64(seg.Len)
+			// Send toGet whatever it's ready to receive.
+			Q: for len(q) > 0 {
+				select {
+				case r.toGet <- q[0]:
+					q = q[1:]
+				default:
+					break Q
+				}
+			}
+		}
+		if r == nil {
+			// File not found
+			rdrChan <- nil
+			return
+		}
+		close(r.countDone)
+		for _, seg := range q {
+			r.toGet <- seg
+		}
+		close(r.toGet)
+	}()
+	// Before returning a reader, wait until we know whether the
+	// file exists here:
+	r := <-rdrChan
+	if r == nil {
+		return nil, os.ErrNotExist
+	}
+	return r, nil
+}
+
+type cfReader struct {
+	keepClient *KeepClient
+	// doGet() reads FileSegments from toGet, gets the data from
+	// Keep, and sends byte slices to toRead to be consumed by
+	// Read().
+	toGet        chan *manifest.FileSegment
+	toRead       chan []byte
+	// bytes ready to send next time someone calls Read()
+	buf          []byte
+	// Total size of the file being read. Not safe to read this
+	// until countDone is closed.
+	totalSize    uint64
+	countDone    chan struct{}
+	// First error encountered.
+	err          error
+}
+
+func (r *cfReader) Read(outbuf []byte) (n int, err error) {
+	if r.err != nil {
+		return 0, r.err
+	}
+	for r.buf == nil || len(r.buf) == 0 {
+		var ok bool
+		r.buf, ok = <-r.toRead
+		if r.err != nil {
+			return 0, r.err
+		} else if !ok {
+			return 0, io.EOF
+		}
+	}
+	if len(r.buf) > len(outbuf) {
+		n = len(outbuf)
+	} else {
+		n = len(r.buf)
+	}
+	copy(outbuf[:n], r.buf[:n])
+	r.buf = r.buf[n:]
+	return
+}
+
+func (r *cfReader) Close() error {
+	_, _ = <-r.countDone
+	for _ = range r.toGet {
+	}
+	for _ = range r.toRead {
+	}
+	return r.err
+}
+
+func (r *cfReader) Len() uint64 {
+	// Wait for all segments to be counted
+	_, _ = <-r.countDone
+	return r.totalSize
+}
+
+func (r *cfReader) doGet() {
+	defer close(r.toRead)
+	for fs := range r.toGet {
+		rdr, _, _, err := r.keepClient.Get(fs.Locator)
+		if err != nil {
+			r.err = err
+			return
+		}
+		var buf = make([]byte, fs.Offset+fs.Len)
+		_, err = io.ReadFull(rdr, buf)
+		if err != nil {
+			r.err = err
+			return
+		}
+		for bOff, bLen := fs.Offset, 1<<20; bOff <= fs.Offset+fs.Len && bLen > 0; bOff += bLen {
+			if bOff+bLen > fs.Offset+fs.Len {
+				bLen = fs.Offset + fs.Len - bOff
+			}
+			r.toRead <- buf[bOff : bOff+bLen]
+		}
+	}
+}
+
+func newCFReader(kc *KeepClient) (r *cfReader) {
+	r = new(cfReader)
+	r.keepClient = kc
+	r.toGet = make(chan *manifest.FileSegment, 2)
+	r.toRead = make(chan []byte)
+	r.countDone = make(chan struct{})
+	go r.doGet()
+	return
+}
diff --git a/sdk/go/keepclient/collectionreader_test.go b/sdk/go/keepclient/collectionreader_test.go
new file mode 100644
index 0000000..f271208
--- /dev/null
+++ b/sdk/go/keepclient/collectionreader_test.go
@@ -0,0 +1,123 @@
+package keepclient
+
+import (
+	"crypto/md5"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"net/http"
+	"os"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&IntegrationSuite{})
+
+// IntegrationSuite tests need an API server
+type IntegrationSuite struct{}
+
+type SuccessHandler struct {
+	disk map[string][]byte
+	lock chan struct{}
+}
+
+func (h SuccessHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+	switch req.Method {
+	case "PUT":
+		buf, err := ioutil.ReadAll(req.Body)
+		if err != nil {
+			resp.WriteHeader(500)
+			return
+		}
+		pdh := fmt.Sprintf("%x+%d", md5.Sum(buf), len(buf))
+		h.lock <- struct{}{}
+		h.disk[pdh] = buf
+		<- h.lock
+		resp.Write([]byte(pdh))
+	case "GET":
+		pdh := req.URL.Path[1:]
+		h.lock <- struct{}{}
+		buf, ok := h.disk[pdh]
+		<- h.lock
+		if !ok {
+			resp.WriteHeader(http.StatusNotFound)
+		} else {
+			resp.Write(buf)
+		}
+	default:
+		resp.WriteHeader(http.StatusMethodNotAllowed)
+	}
+}
+
+type rdrTest struct {
+	mt   string      // manifest text
+	f    string      // filename
+	want interface{} // error or string to expect
+}
+
+func (s *ServerRequiredSuite) TestCollectionReaderContent(c *check.C) {
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.IsNil)
+	arv.ApiToken = arvadostest.ActiveToken
+
+	kc, err := MakeKeepClient(&arv)
+	c.Assert(err, check.IsNil)
+
+	{
+		localRoots := make(map[string]string)
+		h := SuccessHandler{disk: make(map[string][]byte), lock: make(chan struct{}, 1)}
+		for i, k := range RunSomeFakeKeepServers(h, 4) {
+			localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+		}
+		kc.SetServiceRoots(localRoots, localRoots, nil)
+		kc.PutB([]byte("foo"))
+		kc.PutB([]byte("bar"))
+		kc.PutB([]byte("Hello world\n"))
+		kc.PutB([]byte(""))
+	}
+
+	mt := arvadostest.PathologicalManifest
+
+	for _, testCase := range []rdrTest{
+		{mt: mt, f: "zzzz", want: os.ErrNotExist},
+		{mt: mt, f: "frob", want: os.ErrNotExist},
+		{mt: mt, f: "/segmented/frob", want: os.ErrNotExist},
+		{mt: mt, f: "./segmented/frob", want: os.ErrNotExist},
+		{mt: mt, f: "/f", want: os.ErrNotExist},
+		{mt: mt, f: "./f", want: os.ErrNotExist},
+		{mt: mt, f: "foo bar//baz", want: os.ErrNotExist},
+		{mt: mt, f: "foo/zero", want: ""},
+		{mt: mt, f: "zero at 0", want: ""},
+		{mt: mt, f: "zero at 1", want: ""},
+		{mt: mt, f: "zero at 4", want: ""},
+		{mt: mt, f: "zero at 9", want: ""},
+		{mt: mt, f: "f", want: "f"},
+		{mt: mt, f: "ooba", want: "ooba"},
+		{mt: mt, f: "overlapReverse/o", want: "o"},
+		{mt: mt, f: "overlapReverse/oo", want: "oo"},
+		{mt: mt, f: "overlapReverse/ofoo", want: "ofoo"},
+		{mt: mt, f: "foo bar/baz", want: "foo"},
+		{mt: mt, f: "segmented/frob", want: "frob"},
+		{mt: mt, f: "segmented/oof", want: "oof"},
+	} {
+		rdr, err := kc.CollectionFileReader(map[string]interface{}{"manifest_text": testCase.mt}, testCase.f)
+		switch want := testCase.want.(type) {
+		case error:
+			c.Check(rdr, check.IsNil)
+			c.Check(err, check.Equals, want)
+		case string:
+			buf := make([]byte, len(want))
+			n, err := io.ReadFull(rdr, buf)
+			c.Check(err, check.IsNil)
+			for i := 0; i < 4; i++ {
+				c.Check(string(buf), check.Equals, want)
+				n, err = rdr.Read(buf)
+				c.Check(n, check.Equals, 0)
+				c.Check(err, check.Equals, io.EOF)
+			}
+			c.Check(rdr.Close(), check.Equals, nil)
+		}
+	}
+}
diff --git a/sdk/go/manifest/manifest.go b/sdk/go/manifest/manifest.go
index 4e816cd..f104d9a 100644
--- a/sdk/go/manifest/manifest.go
+++ b/sdk/go/manifest/manifest.go
@@ -5,25 +5,185 @@
 package manifest
 
 import (
+	"errors"
+	"fmt"
 	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
 	"log"
+	"regexp"
+	"strconv"
 	"strings"
 )
 
+var ErrInvalidToken = errors.New("Invalid token")
+
+var LocatorPattern = regexp.MustCompile(
+	"^[0-9a-fA-F]{32}\\+[0-9]+(\\+[A-Z][A-Za-z0-9 at _-]+)*$")
+
 type Manifest struct {
 	Text string
 }
 
+type BlockLocator struct {
+	Digest blockdigest.BlockDigest
+	Size   int
+	Hints  []string
+}
+
+type DataSegment struct {
+	BlockLocator
+	Locator      string
+	StreamOffset uint64
+}
+
+// FileSegment is a portion of a file that is contained within a
+// single block.
+type FileSegment struct {
+	Locator string
+	// Offset (within this block) of this data segment
+	Offset int
+	Len    int
+}
+
 // Represents a single line from a manifest.
 type ManifestStream struct {
 	StreamName string
 	Blocks     []string
-	Files      []string
+	FileTokens []string
+}
+
+var escapeSeq = regexp.MustCompile(`\\([0-9]{3}|\\)`)
+
+func unescapeSeq(seq string) string {
+	if seq == `\\` {
+		return `\`
+	}
+	i, err := strconv.ParseUint(seq[1:], 8, 8)
+	if err != nil {
+		// Invalid escape sequence: can't unescape.
+		return seq
+	}
+	return string([]byte{byte(i)})
+}
+
+func UnescapeName(s string) string {
+	return escapeSeq.ReplaceAllStringFunc(s, unescapeSeq)
+}
+
+func ParseBlockLocator(s string) (b BlockLocator, err error) {
+	if !LocatorPattern.MatchString(s) {
+		err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern "+
+			"\"%s\".",
+			s,
+			LocatorPattern.String())
+	} else {
+		tokens := strings.Split(s, "+")
+		var blockSize int64
+		var blockDigest blockdigest.BlockDigest
+		// We expect both of the following to succeed since LocatorPattern
+		// restricts the strings appropriately.
+		blockDigest, err = blockdigest.FromString(tokens[0])
+		if err != nil {
+			return
+		}
+		blockSize, err = strconv.ParseInt(tokens[1], 10, 0)
+		if err != nil {
+			return
+		}
+		b.Digest = blockDigest
+		b.Size = int(blockSize)
+		b.Hints = tokens[2:]
+	}
+	return
+}
+
+func parseFileToken(tok string) (segPos, segLen uint64, name string, err error) {
+	parts := strings.SplitN(tok, ":", 3)
+	if len(parts) != 3 {
+		err = ErrInvalidToken
+		return
+	}
+	segPos, err = strconv.ParseUint(parts[0], 10, 64)
+	if err != nil {
+		return
+	}
+	segLen, err = strconv.ParseUint(parts[1], 10, 64)
+	if err != nil {
+		return
+	}
+	name = UnescapeName(parts[2])
+	return
+}
+
+func (s *ManifestStream) FileSegmentIterByName(filepath string) <-chan *FileSegment {
+	ch := make(chan *FileSegment)
+	go func() {
+		s.sendFileSegmentIterByName(filepath, ch)
+		close(ch)
+	}()
+	return ch
+}
+
+func (s *ManifestStream) sendFileSegmentIterByName(filepath string, ch chan<- *FileSegment) {
+	blockLens := make([]int, 0, len(s.Blocks))
+	// This is what streamName+"/"+fileName will look like:
+	target := "./" + filepath
+	for _, fTok := range s.FileTokens {
+		wantPos, wantLen, name, err := parseFileToken(fTok)
+		if err != nil {
+			// Skip (!) invalid file tokens.
+			continue
+		}
+		if s.StreamName+"/"+name != target {
+			continue
+		}
+		if wantLen == 0 {
+			ch <- &FileSegment{Locator: "d41d8cd98f00b204e9800998ecf8427e+0", Offset: 0, Len: 0}
+			continue
+		}
+		// Linear search for blocks containing data for this
+		// file
+		var blockPos uint64 = 0 // position of block in stream
+		for i, loc := range s.Blocks {
+			if blockPos >= wantPos+wantLen {
+				break
+			}
+			if len(blockLens) <= i {
+				blockLens = blockLens[:i+1]
+				b, err := ParseBlockLocator(loc)
+				if err != nil {
+					// Unparseable locator -> unusable
+					// stream.
+					ch <- nil
+					return
+				}
+				blockLens[i] = b.Size
+			}
+			blockLen := uint64(blockLens[i])
+			if blockPos+blockLen <= wantPos {
+				blockPos += blockLen
+				continue
+			}
+			fseg := FileSegment{
+				Locator: loc,
+				Offset:  0,
+				Len:     blockLens[i],
+			}
+			if blockPos < wantPos {
+				fseg.Offset = int(wantPos - blockPos)
+				fseg.Len -= fseg.Offset
+			}
+			if blockPos+blockLen > wantPos+wantLen {
+				fseg.Len = int(wantPos+wantLen-blockPos) - fseg.Offset
+			}
+			ch <- &fseg
+			blockPos += blockLen
+		}
+	}
 }
 
 func parseManifestStream(s string) (m ManifestStream) {
 	tokens := strings.Split(s, " ")
-	m.StreamName = tokens[0]
+	m.StreamName = UnescapeName(tokens[0])
 	tokens = tokens[1:]
 	var i int
 	for i = range tokens {
@@ -32,7 +192,7 @@ func parseManifestStream(s string) (m ManifestStream) {
 		}
 	}
 	m.Blocks = tokens[:i]
-	m.Files = tokens[i:]
+	m.FileTokens = tokens[i:]
 	return
 }
 
@@ -58,6 +218,20 @@ func (m *Manifest) StreamIter() <-chan ManifestStream {
 	return ch
 }
 
+func (m *Manifest) FileSegmentIterByName(filepath string) <-chan *FileSegment {
+	ch := make(chan *FileSegment)
+	go func() {
+		for stream := range m.StreamIter() {
+			if !strings.HasPrefix("./"+filepath, stream.StreamName+"/") {
+				continue
+			}
+			stream.sendFileSegmentIterByName(filepath, ch)
+		}
+		close(ch)
+	}()
+	return ch
+}
+
 // Blocks may appear mulitple times within the same manifest if they
 // are used by multiple files. In that case this Iterator will output
 // the same block multiple times.
diff --git a/sdk/go/manifest/manifest_test.go b/sdk/go/manifest/manifest_test.go
index 8cfe3d9..364648d 100644
--- a/sdk/go/manifest/manifest_test.go
+++ b/sdk/go/manifest/manifest_test.go
@@ -1,10 +1,13 @@
 package manifest
 
 import (
-	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
 	"io/ioutil"
+	"reflect"
 	"runtime"
 	"testing"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
 )
 
 func getStackTrace() string {
@@ -60,7 +63,7 @@ func expectStringSlicesEqual(t *testing.T, actual []string, expected []string) {
 func expectManifestStream(t *testing.T, actual ManifestStream, expected ManifestStream) {
 	expectEqual(t, actual.StreamName, expected.StreamName)
 	expectStringSlicesEqual(t, actual.Blocks, expected.Blocks)
-	expectStringSlicesEqual(t, actual.Files, expected.Files)
+	expectStringSlicesEqual(t, actual.FileTokens, expected.FileTokens)
 }
 
 func expectBlockLocator(t *testing.T, actual blockdigest.BlockLocator, expected blockdigest.BlockLocator) {
@@ -72,8 +75,19 @@ func expectBlockLocator(t *testing.T, actual blockdigest.BlockLocator, expected
 func TestParseManifestStreamSimple(t *testing.T) {
 	m := parseManifestStream(". 365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf 0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt")
 	expectManifestStream(t, m, ManifestStream{StreamName: ".",
-		Blocks: []string{"365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
-		Files:  []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
+		Blocks:     []string{"365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
+		FileTokens: []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
+}
+
+func TestParseBlockLocatorSimple(t *testing.T) {
+	b, err := ParseBlockLocator("365f83f5f808896ec834c8b595288735+2310+K at qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf")
+	if err != nil {
+		t.Fatalf("Unexpected error parsing block locator: %v", err)
+	}
+	expectBlockLocator(t, b, BlockLocator{Digest: blockdigest.AssertFromString("365f83f5f808896ec834c8b595288735"),
+		Size: 2310,
+		Hints: []string{"K at qr1hi",
+			"Af0c9a66381f3b028677411926f0be1c6282fe67c at 542b5ddf"}})
 }
 
 func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
@@ -88,8 +102,8 @@ func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
 	expectManifestStream(t,
 		firstStream,
 		ManifestStream{StreamName: ".",
-			Blocks: []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475 at 5441920c"},
-			Files:  []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
+			Blocks:     []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475 at 5441920c"},
+			FileTokens: []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
 
 	received, ok := <-streamIter
 	if ok {
@@ -126,3 +140,58 @@ func TestBlockIterLongManifest(t *testing.T) {
 			Size:  31367794,
 			Hints: []string{"E53f903684239bcc114f7bf8ff9bd6089f33058db at 5441920c"}})
 }
+
+func TestUnescape(t *testing.T) {
+	for _, testCase := range [][]string{
+		{`\040`, ` `},
+		{`\009`, `\009`},
+		{`\\\040\\`, `\ \`},
+		{`\\040\`, `\040\`},
+	} {
+		in := testCase[0]
+		expect := testCase[1]
+		got := UnescapeName(in)
+		if expect != got {
+			t.Errorf("For '%s' got '%s' instead of '%s'", in, got, expect)
+		}
+	}
+}
+
+type fsegtest struct {
+	mt   string        // manifest text
+	f    string        // filename
+	want []FileSegment // segments should be received on channel
+}
+
+func TestFileSegmentIterByName(t *testing.T) {
+	mt := arvadostest.PathologicalManifest
+	for _, testCase := range []fsegtest{
+		{mt: mt, f: "zzzz", want: nil},
+		// This case is too sensitive: it would be acceptable
+		// (even preferable) to return only one empty segment.
+		{mt: mt, f: "foo/zero", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}, {"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "zero at 0", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "zero at 1", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "zero at 4", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "zero at 9", want: []FileSegment{{"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}}},
+		{mt: mt, f: "f", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}}},
+		{mt: mt, f: "ooba", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}, {"37b51d194a7513e45b56f6524f2d51f2+3", 0, 2}}},
+		{mt: mt, f: "overlapReverse/o", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 2, 1}}},
+		{mt: mt, f: "overlapReverse/oo", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}}},
+		{mt: mt, f: "overlapReverse/ofoo", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 2, 1}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 3}}},
+		{mt: mt, f: "foo bar/baz", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 3}}},
+		// This case is too sensitive: it would be better to
+		// omit the empty segment.
+		{mt: mt, f: "segmented/frob", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}, {"37b51d194a7513e45b56f6524f2d51f2+3", 2, 1}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 1}, {"d41d8cd98f00b204e9800998ecf8427e+0", 0, 0}, {"37b51d194a7513e45b56f6524f2d51f2+3", 0, 1}}},
+		{mt: mt, f: "segmented/oof", want: []FileSegment{{"acbd18db4cc2f85cedef654fccc4a4d8+3", 1, 2}, {"acbd18db4cc2f85cedef654fccc4a4d8+3", 0, 1}}},
+	} {
+		m := Manifest{Text: testCase.mt}
+		var got []FileSegment
+		for fs := range m.FileSegmentIterByName(testCase.f) {
+			got = append(got, *fs)
+		}
+		if !reflect.DeepEqual(got, testCase.want) {
+			t.Errorf("For %#v:\n got  %#v\n want %#v", testCase.f, got, testCase.want)
+		}
+	}
+}
diff --git a/services/keepdl/handler.go b/services/keepdl/handler.go
index 48e3640..04af920 100644
--- a/services/keepdl/handler.go
+++ b/services/keepdl/handler.go
@@ -11,6 +11,7 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
 	"git.curoverse.com/arvados.git/sdk/go/auth"
 	"git.curoverse.com/arvados.git/sdk/go/httpserver"
+	"git.curoverse.com/arvados.git/sdk/go/keepclient"
 )
 
 var clientPool = arvadosclient.MakeClientPool()
@@ -136,17 +137,20 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 	}
 
 	filename := strings.Join(targetPath, "/")
-	rdr, err := arvadosclient.CollectionFileReader(collection, filename)
+	kc, err := keepclient.MakeKeepClient(arv)
+	if err != nil {
+		statusCode, statusText = http.StatusInternalServerError, err.Error()
+		return
+	}
+	rdr, err := kc.CollectionFileReader(collection, filename)
 	if os.IsNotExist(err) {
 		statusCode = http.StatusNotFound
 		return
-	} else if err == arvadosclient.ErrNotImplemented {
-		statusCode = http.StatusNotImplemented
-		return
 	} else if err != nil {
 		statusCode, statusText = http.StatusBadGateway, err.Error()
 		return
 	}
+	defer rdr.Close()
 
 	// One or both of these can be -1 if not found:
 	basenamePos := strings.LastIndex(filename, "/")
diff --git a/services/keepdl/server_test.go b/services/keepdl/server_test.go
index 66c6812..5864315 100644
--- a/services/keepdl/server_test.go
+++ b/services/keepdl/server_test.go
@@ -15,16 +15,7 @@ import (
 
 var _ = check.Suite(&IntegrationSuite{})
 
-const (
-	spectatorToken  = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
-	activeToken     = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
-	anonymousToken  = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
-	fooCollection   = "zzzzz-4zz18-fy296fx3hot09f7"
-	bogusCollection = "zzzzz-4zz18-totallynotexist"
-	hwCollection    = "zzzzz-4zz18-4en62shvi99lxd4"
-)
-
-// IntegrationSuite tests need an API server and an arv-git-httpd server
+// IntegrationSuite tests need an API server and a keepdl server
 type IntegrationSuite struct {
 	testServer *server
 }
@@ -34,12 +25,12 @@ func (s *IntegrationSuite) TestNoToken(c *check.C) {
 		"",
 		"bogustoken",
 	} {
-		hdr, body := s.runCurl(c, token, "/collections/"+fooCollection+"/foo")
+		hdr, body := s.runCurl(c, token, "/collections/"+arvadostest.FooCollection+"/foo")
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
 		c.Check(body, check.Equals, "")
 
 		if token != "" {
-			hdr, body = s.runCurl(c, token, "/collections/download/"+fooCollection+"/"+token+"/foo")
+			hdr, body = s.runCurl(c, token, "/collections/download/"+arvadostest.FooCollection+"/"+token+"/foo")
 			c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
 			c.Check(body, check.Equals, "")
 		}
@@ -62,46 +53,46 @@ func (s *IntegrationSuite) Test404(c *check.C) {
 		"/download",
 		"/collections",
 		"/collections/",
-		"/collections/" + fooCollection,
-		"/collections/" + fooCollection + "/",
+		"/collections/" + arvadostest.FooCollection,
+		"/collections/" + arvadostest.FooCollection + "/",
 		// Non-existent file in collection
-		"/collections/" + fooCollection + "/theperthcountyconspiracy",
-		"/collections/download/" + fooCollection + "/" + activeToken + "/theperthcountyconspiracy",
+		"/collections/" + arvadostest.FooCollection + "/theperthcountyconspiracy",
+		"/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
 		// Non-existent collection
-		"/collections/" + bogusCollection,
-		"/collections/" + bogusCollection + "/",
-		"/collections/" + bogusCollection + "/theperthcountyconspiracy",
-		"/collections/download/" + bogusCollection + "/" + activeToken + "/theperthcountyconspiracy",
+		"/collections/" + arvadostest.NonexistentCollection,
+		"/collections/" + arvadostest.NonexistentCollection + "/",
+		"/collections/" + arvadostest.NonexistentCollection + "/theperthcountyconspiracy",
+		"/collections/download/" + arvadostest.NonexistentCollection + "/" + arvadostest.ActiveToken + "/theperthcountyconspiracy",
 	} {
-		hdr, body := s.runCurl(c, activeToken, uri)
+		hdr, body := s.runCurl(c, arvadostest.ActiveToken, uri)
 		c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
 		c.Check(body, check.Equals, "")
 	}
 }
 
 func (s *IntegrationSuite) Test200(c *check.C) {
-	anonymousTokens = []string{anonymousToken}
+	anonymousTokens = []string{arvadostest.AnonymousToken}
 	arv, err := arvadosclient.MakeArvadosClient()
 	c.Assert(err, check.Equals, nil)
-	arv.ApiToken = activeToken
+	arv.ApiToken = arvadostest.ActiveToken
 	kc, err := keepclient.MakeKeepClient(&arv)
 	c.Assert(err, check.Equals, nil)
 	kc.PutB([]byte("Hello world\n"))
 	kc.PutB([]byte("foo"))
 	for _, spec := range [][]string{
 		// My collection
-		{activeToken, "/collections/" + fooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{"", "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{"tokensobogus", "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{activeToken, "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
-		{anonymousToken, "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{arvadostest.ActiveToken, "/collections/" + arvadostest.FooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{"", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{"tokensobogus", "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{arvadostest.ActiveToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{arvadostest.AnonymousToken, "/collections/download/" + arvadostest.FooCollection + "/" + arvadostest.ActiveToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
 		// Anonymously accessible user agreement. These should
 		// start working when CollectionFileReader provides
 		// real data instead of fake/stub data.
-		{"", "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{activeToken, "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{spectatorToken, "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
-		{spectatorToken, "/collections/download/"+hwCollection+"/"+spectatorToken+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{"", "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{arvadostest.ActiveToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{arvadostest.SpectatorToken, "/collections/" + arvadostest.HelloWorldCollection + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{arvadostest.SpectatorToken, "/collections/download/" + arvadostest.HelloWorldCollection + "/" + arvadostest.SpectatorToken + "/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
 	} {
 		hdr, body := s.runCurl(c, spec[0], spec[1])
 		if strings.HasPrefix(hdr, "HTTP/1.1 501 Not Implemented\r\n") && body == "" {

commit e7f7945dfc2d3ec30e0cef4f781cd47c31287efa
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Jul 23 01:09:02 2015 -0400

    5824: Add doc.go

diff --git a/services/keepdl/doc.go b/services/keepdl/doc.go
new file mode 100644
index 0000000..65c7f19
--- /dev/null
+++ b/services/keepdl/doc.go
@@ -0,0 +1,57 @@
+// Keepdl provides read-only HTTP access to files stored in Keep. It
+// serves public data to anonymous and unauthenticated clients, and
+// accepts authentication via Arvados tokens. It can be installed
+// anywhere with access to Keep services, typically behind a web proxy
+// that provides SSL support.
+//
+// Given that this amounts to a web hosting service for arbitrary
+// content, it is vital to ensure that at least one of the following is
+// true:
+//
+// Usage
+//
+// Listening:
+//
+//   keepdl -address=:1234
+//
+// Start an HTTP server on port 1234.
+//
+//   keepdl -address=1.2.3.4:1234
+//
+// Start an HTTP server on port 1234, on the interface with IP address 1.2.3.4.
+//
+// Keepdl does not support SSL natively. Typically, it is installed
+// behind a proxy like nginx.
+//
+package main
+
+// TODO(TC): Implement
+//
+// Trusted content
+//
+// Normally, Keepdl is installed using a wildcard DNS entry and a
+// wildcard HTTPS certificate, serving data from collection X at
+// ``https://X.dl.example.com/path/file.ext''.
+//
+// It will also serve publicly accessible data at
+// ``https://dl.example.com/collections/X/path/file.txt'', but it does not
+// accept any kind of credentials at paths like these.
+//
+// In "trust all content" mode, Keepdl will accept credentials (API
+// tokens) and serve any collection X at
+// "https://dl.example.com/collections/X/path/file.ext".  This is
+// UNSAFE except in the special case where everyone who is able write
+// ANY data to Keep, and every JavaScript and HTML file written to
+// Keep, is also trusted to read ALL of the data in Keep.
+//
+// In such cases you can enable trust-all-content mode.
+//
+//   keepdl -trust-all-content [...]
+//
+// In the general case, this should not be enabled: A web page stored
+// in collection X can execute JavaScript code that uses the current
+// viewer's credentials to download additional data -- data which is
+// accessible to the current viewer, but not to the author of
+// collection X -- from the same origin (``https://dl.example.com/'')
+// and upload it to some other site chosen by the author of collection
+// X.

commit 26ffcd38fd6b861a9005959ac016e826d30b8904
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Jun 23 19:12:58 2015 -0400

    5824: Add install doc

diff --git a/doc/install/install-keepdl.html.textile.liquid b/doc/install/install-keepdl.html.textile.liquid
new file mode 100644
index 0000000..6730dff
--- /dev/null
+++ b/doc/install/install-keepdl.html.textile.liquid
@@ -0,0 +1,64 @@
+---
+layout: default
+navsection: installguide
+title: Install download server
+...
+
+This installation guide assumes you are on a 64 bit Debian or Ubuntu system.
+
+The keepdl server provides read-only HTTP access to files stored in Keep. It serves public data to anonymous and unauthenticated clients, and accepts authentication via Arvados tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides SSL support.
+
+By convention, we use the following hostname for the download service:
+
+<div class="offset1">
+table(table table-bordered table-condensed).
+|dl. at uuid_prefix@.your.domain|
+</div>
+
+This hostname should resolve from anywhere on the internet.
+
+h2. Install keepdl
+
+First add the Arvados apt repository, and then install the keepdl package.
+
+<notextile>
+<pre><code>~$ <span class="userinput">echo "deb http://apt.arvados.org/ wheezy main" | sudo tee /etc/apt/sources.list.d/apt.arvados.org.list</span>
+~$ <span class="userinput">sudo /usr/bin/apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7</span>
+~$ <span class="userinput">sudo /usr/bin/apt-get update</span>
+~$ <span class="userinput">sudo /usr/bin/apt-get install keepdl</span>
+</code></pre>
+</notextile>
+
+Verify that @keepdl@ is functional:
+
+<notextile>
+<pre><code>~$ <span class="userinput">keepdl -h</span>
+Usage of keepdl:
+  -address="0.0.0.0:80": Address to listen on, "host:port".
+</code></pre>
+</notextile>
+
+We recommend running @arv-git-httpd@ under "runit":https://packages.debian.org/search?keywords=runit or something similar.
+
+Your @run@ script should look something like this:
+
+<notextile>
+<pre><code>export ARVADOS_API_HOST=<span class="userinput">uuid_prefix</span>.your.domain
+exec sudo -u nobody keepdl -address=:9002 2>&1
+</code></pre>
+</notextile>
+
+h3. Set up a reverse proxy with SSL support
+
+The keepdl service will be accessible from anywhere on the internet, so we recommend using SSL for transport encryption.
+
+This is best achieved by putting a reverse proxy with SSL support in front of keepdl, running on port 443 and passing requests to keepdl on port 9002 (or whatever port you chose in your run script).
+
+h3. Tell the API server about the keepdl service
+
+In your API server's config/application.yml file, add the following entry:
+
+<notextile>
+<pre><code>keepdl: dl.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>

commit 374ce34bc1f1e8bca18c12f70d7de8dc8c5b60b6
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Jun 17 02:47:49 2015 -0400

    5824: Assign MIME type by file extension. closes #6327

diff --git a/services/keepdl/handler.go b/services/keepdl/handler.go
index bbcd53c..48e3640 100644
--- a/services/keepdl/handler.go
+++ b/services/keepdl/handler.go
@@ -3,6 +3,7 @@ package main
 import (
 	"fmt"
 	"io"
+	"mime"
 	"net/http"
 	"os"
 	"strings"
@@ -146,6 +147,17 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 		statusCode, statusText = http.StatusBadGateway, err.Error()
 		return
 	}
+
+	// One or both of these can be -1 if not found:
+	basenamePos := strings.LastIndex(filename, "/")
+	extPos := strings.LastIndex(filename, ".")
+	if extPos > basenamePos {
+		// Now extPos is safely >= 0.
+		if t := mime.TypeByExtension(filename[extPos:]); t != "" {
+			w.Header().Set("Content-Type", t)
+		}
+	}
+
 	_, err = io.Copy(w, rdr)
 	if err != nil {
 		statusCode, statusText = http.StatusBadGateway, err.Error()
diff --git a/services/keepdl/server_test.go b/services/keepdl/server_test.go
index 1c36f98..66c6812 100644
--- a/services/keepdl/server_test.go
+++ b/services/keepdl/server_test.go
@@ -109,6 +109,12 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 			continue
 		}
 		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+		if strings.HasSuffix(spec[1], ".txt") {
+			c.Check(hdr, check.Matches, `(?s).*\r\nContent-Type: text/plain.*`)
+			// TODO: Check some types that aren't
+			// automatically detected by Go's http server
+			// by sniffing the content.
+		}
 		c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec[2])
 	}
 }

commit 59976e5583eda9ff1bf35cb299165829829c5b5d
Author: Tom Clegg <tom at curoverse.com>
Date:   Thu Jul 23 00:02:11 2015 -0400

    5824: Add keepdl.

diff --git a/services/keepdl/.gitignore b/services/keepdl/.gitignore
new file mode 100644
index 0000000..173e306
--- /dev/null
+++ b/services/keepdl/.gitignore
@@ -0,0 +1 @@
+keepdl
diff --git a/services/keepdl/handler.go b/services/keepdl/handler.go
new file mode 100644
index 0000000..bbcd53c
--- /dev/null
+++ b/services/keepdl/handler.go
@@ -0,0 +1,153 @@
+package main
+
+import (
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"strings"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+	"git.curoverse.com/arvados.git/sdk/go/auth"
+	"git.curoverse.com/arvados.git/sdk/go/httpserver"
+)
+
+var clientPool = arvadosclient.MakeClientPool()
+
+var anonymousTokens []string
+
+type handler struct{}
+
+func init() {
+	// TODO(TC): Get anonymousTokens from flags
+	anonymousTokens = []string{}
+}
+
+func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
+	var statusCode int
+	var statusText string
+
+	w := httpserver.WrapResponseWriter(wOrig)
+	defer func() {
+		if statusCode > 0 {
+			if w.WroteStatus() == 0 {
+				w.WriteHeader(statusCode)
+			} else {
+				httpserver.Log(r.RemoteAddr, "WARNING",
+					fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
+			}
+		}
+		if statusText == "" {
+			statusText = http.StatusText(statusCode)
+		}
+		httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.URL.Path)
+	}()
+
+	arv := clientPool.Get()
+	if arv == nil {
+		statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
+		return
+	}
+	defer clientPool.Put(arv)
+
+	pathParts := strings.Split(r.URL.Path[1:], "/")
+
+	if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
+		statusCode = http.StatusNotFound
+		return
+	}
+
+	var targetId string
+	var targetPath []string
+	var tokens []string
+	var reqTokens []string
+	var pathToken bool
+	if len(pathParts) >= 5 && pathParts[1] == "download" {
+		// "/collections/download/{id}/{token}/path..." form:
+		// Don't use our configured anonymous tokens,
+		// Authorization headers, etc.  Just use the token in
+		// the path.
+		targetId = pathParts[2]
+		tokens = []string{pathParts[3]}
+		targetPath = pathParts[4:]
+		pathToken = true
+	} else {
+		// "/collections/{id}/path..." form
+		targetId = pathParts[1]
+		reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+		tokens = append(reqTokens, anonymousTokens...)
+		targetPath = pathParts[2:]
+	}
+
+	tokenResult := make(map[string]int)
+	collection := make(map[string]interface{})
+	found := false
+	for _, arv.ApiToken = range tokens {
+		err := arv.Get("collections", targetId, nil, &collection)
+		httpserver.Log(err)
+		if err == nil {
+			// Success
+			found = true
+			break
+		}
+		if srvErr, ok := err.(arvadosclient.APIServerError); ok {
+			switch srvErr.HttpStatusCode {
+			case 404, 401:
+				// Token broken or insufficient to
+				// retrieve collection
+				tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
+				continue
+			}
+		}
+		// Something more serious is wrong
+		statusCode, statusText = http.StatusInternalServerError, err.Error()
+		return
+	}
+	if !found {
+		if pathToken {
+			// The URL is a "secret sharing link", but it
+			// didn't work out. Asking the client for
+			// additional credentials would just be
+			// confusing.
+			statusCode = http.StatusNotFound
+			return
+		}
+		for _, t := range reqTokens {
+			if tokenResult[t] == 404 {
+				// The client provided valid token(s), but the
+				// collection was not found.
+				statusCode = http.StatusNotFound
+				return
+			}
+		}
+		// The client's token was invalid (e.g., expired), or
+		// the client didn't even provide one.  Propagate the
+		// 401 to encourage the client to use a [different]
+		// token.
+		//
+		// TODO(TC): This response would be confusing to
+		// someone trying (anonymously) to download public
+		// data that has been deleted.  Allow a referrer to
+		// provide this context somehow?
+		statusCode = http.StatusUnauthorized
+		w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"")
+		return
+	}
+
+	filename := strings.Join(targetPath, "/")
+	rdr, err := arvadosclient.CollectionFileReader(collection, filename)
+	if os.IsNotExist(err) {
+		statusCode = http.StatusNotFound
+		return
+	} else if err == arvadosclient.ErrNotImplemented {
+		statusCode = http.StatusNotImplemented
+		return
+	} else if err != nil {
+		statusCode, statusText = http.StatusBadGateway, err.Error()
+		return
+	}
+	_, err = io.Copy(w, rdr)
+	if err != nil {
+		statusCode, statusText = http.StatusBadGateway, err.Error()
+	}
+}
diff --git a/services/keepdl/main.go b/services/keepdl/main.go
new file mode 100644
index 0000000..d780cc3
--- /dev/null
+++ b/services/keepdl/main.go
@@ -0,0 +1,28 @@
+package main
+
+import (
+	"flag"
+	"log"
+	"os"
+)
+
+func init() {
+	// MakeArvadosClient returns an error if this env var isn't
+	// available as a default token (even if we explicitly set a
+	// different token before doing anything with the client). We
+	// set this dummy value during init so it doesn't clobber the
+	// one used by "run test servers".
+	os.Setenv("ARVADOS_API_TOKEN", "xxx")
+}
+
+func main() {
+	flag.Parse()
+	srv := &server{}
+	if err := srv.Start(); err != nil {
+		log.Fatal(err)
+	}
+	log.Println("Listening at", srv.Addr)
+	if err := srv.Wait(); err != nil {
+		log.Fatal(err)
+	}
+}
diff --git a/services/keepdl/server.go b/services/keepdl/server.go
new file mode 100644
index 0000000..44da00f
--- /dev/null
+++ b/services/keepdl/server.go
@@ -0,0 +1,27 @@
+package main
+
+import (
+	"flag"
+	"net/http"
+
+	"git.curoverse.com/arvados.git/sdk/go/httpserver"
+)
+
+var address string
+
+func init() {
+	flag.StringVar(&address, "address", "0.0.0.0:80",
+		"Address to listen on, \"host:port\".")
+}
+
+type server struct {
+	httpserver.Server
+}
+
+func (srv *server) Start() error {
+	mux := http.NewServeMux()
+	mux.Handle("/", &handler{})
+	srv.Handler = mux
+	srv.Addr = address
+	return srv.Server.Start()
+}
diff --git a/services/keepdl/server_test.go b/services/keepdl/server_test.go
new file mode 100644
index 0000000..1c36f98
--- /dev/null
+++ b/services/keepdl/server_test.go
@@ -0,0 +1,170 @@
+package main
+
+import (
+	"crypto/md5"
+	"fmt"
+	"os/exec"
+	"strings"
+	"testing"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+	"git.curoverse.com/arvados.git/sdk/go/arvadostest"
+	"git.curoverse.com/arvados.git/sdk/go/keepclient"
+	check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&IntegrationSuite{})
+
+const (
+	spectatorToken  = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
+	activeToken     = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
+	anonymousToken  = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
+	fooCollection   = "zzzzz-4zz18-fy296fx3hot09f7"
+	bogusCollection = "zzzzz-4zz18-totallynotexist"
+	hwCollection    = "zzzzz-4zz18-4en62shvi99lxd4"
+)
+
+// IntegrationSuite tests need an API server and an arv-git-httpd server
+type IntegrationSuite struct {
+	testServer *server
+}
+
+func (s *IntegrationSuite) TestNoToken(c *check.C) {
+	for _, token := range []string{
+		"",
+		"bogustoken",
+	} {
+		hdr, body := s.runCurl(c, token, "/collections/"+fooCollection+"/foo")
+		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
+		c.Check(body, check.Equals, "")
+
+		if token != "" {
+			hdr, body = s.runCurl(c, token, "/collections/download/"+fooCollection+"/"+token+"/foo")
+			c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+			c.Check(body, check.Equals, "")
+		}
+
+		hdr, body = s.runCurl(c, token, "/bad-route")
+		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+		c.Check(body, check.Equals, "")
+	}
+}
+
+// TODO: Move most cases to functional tests -- at least use Go's own
+// http client instead of forking curl. Just leave enough of an
+// integration test to assure that the documented way of invoking curl
+// really works against the server.
+func (s *IntegrationSuite) Test404(c *check.C) {
+	for _, uri := range []string{
+		// Routing errors
+		"/",
+		"/foo",
+		"/download",
+		"/collections",
+		"/collections/",
+		"/collections/" + fooCollection,
+		"/collections/" + fooCollection + "/",
+		// Non-existent file in collection
+		"/collections/" + fooCollection + "/theperthcountyconspiracy",
+		"/collections/download/" + fooCollection + "/" + activeToken + "/theperthcountyconspiracy",
+		// Non-existent collection
+		"/collections/" + bogusCollection,
+		"/collections/" + bogusCollection + "/",
+		"/collections/" + bogusCollection + "/theperthcountyconspiracy",
+		"/collections/download/" + bogusCollection + "/" + activeToken + "/theperthcountyconspiracy",
+	} {
+		hdr, body := s.runCurl(c, activeToken, uri)
+		c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
+		c.Check(body, check.Equals, "")
+	}
+}
+
+func (s *IntegrationSuite) Test200(c *check.C) {
+	anonymousTokens = []string{anonymousToken}
+	arv, err := arvadosclient.MakeArvadosClient()
+	c.Assert(err, check.Equals, nil)
+	arv.ApiToken = activeToken
+	kc, err := keepclient.MakeKeepClient(&arv)
+	c.Assert(err, check.Equals, nil)
+	kc.PutB([]byte("Hello world\n"))
+	kc.PutB([]byte("foo"))
+	for _, spec := range [][]string{
+		// My collection
+		{activeToken, "/collections/" + fooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{"", "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{"tokensobogus", "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{activeToken, "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		{anonymousToken, "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+		// Anonymously accessible user agreement. These should
+		// start working when CollectionFileReader provides
+		// real data instead of fake/stub data.
+		{"", "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{activeToken, "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{spectatorToken, "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+		{spectatorToken, "/collections/download/"+hwCollection+"/"+spectatorToken+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+	} {
+		hdr, body := s.runCurl(c, spec[0], spec[1])
+		if strings.HasPrefix(hdr, "HTTP/1.1 501 Not Implemented\r\n") && body == "" {
+			c.Log("Not implemented!")
+			continue
+		}
+		c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+		c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec[2])
+	}
+}
+
+// Return header block and body.
+func (s *IntegrationSuite) runCurl(c *check.C, token, uri string, args ...string) (hdr, body string) {
+	curlArgs := []string{"--silent", "--show-error", "--include"}
+	if token != "" {
+		curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
+	}
+	curlArgs = append(curlArgs, args...)
+	curlArgs = append(curlArgs, "http://"+s.testServer.Addr+uri)
+	c.Log(fmt.Sprintf("curlArgs == %#v", curlArgs))
+	output, err := exec.Command("curl", curlArgs...).CombinedOutput()
+	// Without "-f", curl exits 0 as long as it gets a valid HTTP
+	// response from the server, even if the response status
+	// indicates that the request failed. In our test suite, we
+	// always expect a valid HTTP response, and we parse the
+	// headers ourselves. If curl exits non-zero, our testing
+	// environment is broken.
+	c.Assert(err, check.Equals, nil)
+	hdrsAndBody := strings.SplitN(string(output), "\r\n\r\n", 2)
+	c.Assert(len(hdrsAndBody), check.Equals, 2)
+	hdr = hdrsAndBody[0]
+	body = hdrsAndBody[1]
+	return
+}
+
+func (s *IntegrationSuite) SetUpSuite(c *check.C) {
+	arvadostest.StartAPI()
+	arvadostest.StartKeep()
+}
+
+func (s *IntegrationSuite) TearDownSuite(c *check.C) {
+	arvadostest.StopKeep()
+	arvadostest.StopAPI()
+}
+
+func (s *IntegrationSuite) SetUpTest(c *check.C) {
+	arvadostest.ResetEnv()
+	s.testServer = &server{}
+	var err error
+	address = "127.0.0.1:0"
+	err = s.testServer.Start()
+	c.Assert(err, check.Equals, nil)
+}
+
+func (s *IntegrationSuite) TearDownTest(c *check.C) {
+	var err error
+	if s.testServer != nil {
+		err = s.testServer.Close()
+	}
+	c.Check(err, check.Equals, nil)
+}
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+	check.TestingT(t)
+}

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list