[arvados] created: 2.1.0-2615-g353fb35aa
git repository hosting
git at public.arvados.org
Thu Jun 23 14:47:39 UTC 2022
at 353fb35aa5262afc98b7ddbcdab1d7563abeb441 (commit)
commit 353fb35aa5262afc98b7ddbcdab1d7563abeb441
Author: Tom Clegg <tom at curii.com>
Date: Thu Jun 23 10:11:40 2022 -0400
19166: Allow multiple clusters to use loopback driver on same host.
If they don't ignore foreign UUIDs, they kill one another's processes
because A's container is never in B's queue.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
index 1c8d62c20..ba49599ed 100644
--- a/lib/dispatchcloud/worker/worker.go
+++ b/lib/dispatchcloud/worker/worker.go
@@ -418,6 +418,12 @@ func (wkr *worker) probeRunning() (running []string, reportsBroken, ok bool) {
// empty string following final newline
} else if s == "broken" {
reportsBroken = true
+ } else if !strings.HasPrefix(s, wkr.wp.cluster.ClusterID) {
+ // Ignore crunch-run processes that belong to
+ // a different cluster (this arises in
+ // multi-cluster test cases that use the
+ // loopback driver)
+ continue
} else if toks := strings.Split(s, " "); len(toks) == 1 {
running = append(running, s)
} else if toks[1] == "stale" {
commit 88f6f39fa43b7a1452955143a85f57b393196a9e
Author: Tom Clegg <tom at curii.com>
Date: Thu Jun 23 00:56:31 2022 -0400
19166: Explain HPC container shell in architecture docs.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/doc/_config.yml b/doc/_config.yml
index 7c5e6d986..2f3133618 100644
--- a/doc/_config.yml
+++ b/doc/_config.yml
@@ -161,6 +161,7 @@ navbar:
- Computation with Crunch:
- api/execution.html.textile.liquid
- architecture/dispatchcloud.html.textile.liquid
+ - architecture/hpc.html.textile.liquid
- architecture/singularity.html.textile.liquid
- Other:
- api/permission-model.html.textile.liquid
diff --git a/doc/architecture/hpc.html.textile.liquid b/doc/architecture/hpc.html.textile.liquid
new file mode 100644
index 000000000..03a464971
--- /dev/null
+++ b/doc/architecture/hpc.html.textile.liquid
@@ -0,0 +1,29 @@
+---
+layout: default
+navsection: architecture
+title: Dispatching containers to HPC
+...
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+Arvados can be configured to run containers on an HPC cluster using Slurm or LSF, as an alternative to "dispatching to cloud VMs":dispatchcloud.html.
+
+In this configuration, the appropriate Arvados dispatcher service -- @crunch-dispatch-slurm@ or @arvados-dispatch-lsf@ -- picks up each container as it appears in the Arvados queue and submits a short shell script as a batch job to the HPC job queue. The shell script executes the @crunch-run@ container supervisor which retrieves the container specification from the Arvados controller, starts an arv-mount process, runs the container using @docker exec@ or @singularity exec@, and sends updates (logs, outputs, exit code, etc.) back to the Arvados controller.
+
+h2. Container communication channel (reverse https tunnel)
+
+The crunch-run program runs a gateway server to facilitate the “container shell” feature. However, depending on the site's network topology, the Arvados controller may not be able to connect directly to the compute node where a given crunch-run process is running.
+
+Instead, in the HPC configuration, crunch-run connects to the Arvados controller at startup and sets up a multiplexed tunnel, allowing the controller process to connect to crunch-run's gateway server without initiating a connection to the compute node, or even knowing the compute node's IP address.
+
+This means that when a client requests a container shell connection, the traffic goes through two or three servers:
+# The client connects to a controller host C1.
+# If the multiplexed tunnel is connected to a different controller host C2, then C1 proxies the incoming request to C2, using C2's InternalURL.
+# The controller host (C1 or C2) uses the multiplexed tunnel to connect to crunch-run's container gateway.
+
+h2. Scaling
+
+The @API.MaxConcurrentRequests@ configuration should not be set too low, or the long-lived tunnel connections can starve other clients.
commit b44ac131b7385af241acdcbf3835f743ea590b6a
Author: Tom Clegg <tom at curii.com>
Date: Wed Jun 22 09:56:18 2022 -0400
19166: Account for auto-retry in integration test.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/controller/integration_test.go b/lib/controller/integration_test.go
index 67d60197e..65d34315a 100644
--- a/lib/controller/integration_test.go
+++ b/lib/controller/integration_test.go
@@ -1189,15 +1189,21 @@ func (s *IntegrationSuite) runContainer(c *check.C, clusterID string, ctrSpec ma
deadline := time.Now().Add(time.Minute)
wait:
for ; ; lastState = ctr.State {
+ if time.Now().After(deadline) {
+ c.Errorf("timed out, container request state is %q", cr.State)
+ showlogs(ctr.Log)
+ c.FailNow()
+ }
err = ac.RequestAndDecode(&ctr, "GET", "/arvados/v1/containers/"+cr.ContainerUUID, nil, nil)
- c.Assert(err, check.IsNil)
+ if err != nil {
+ // container req is being auto-retried with a new container uuid
+ ac.RequestAndDecode(&cr, "GET", "/arvados/v1/container_requests/"+cr.UUID, nil, nil)
+ c.Assert(err, check.IsNil)
+ time.Sleep(time.Second / 2)
+ continue
+ }
switch ctr.State {
case lastState:
- if time.Now().After(deadline) {
- c.Errorf("timed out, container request state is %q", cr.State)
- showlogs(ctr.Log)
- c.FailNow()
- }
time.Sleep(time.Second / 2)
case arvados.ContainerStateComplete:
break wait
commit d805410033037ba6d3dd3538dc802ff1ba9065b1
Author: Tom Clegg <tom at curii.com>
Date: Tue Jun 21 21:03:52 2022 -0400
19166: Proxy container shell request to other controller instance.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/controller/federation/conn.go b/lib/controller/federation/conn.go
index 08d3ab1a6..ffb150bf2 100644
--- a/lib/controller/federation/conn.go
+++ b/lib/controller/federation/conn.go
@@ -375,7 +375,7 @@ func (conn *Conn) ContainerUnlock(ctx context.Context, options arvados.GetOption
return conn.chooseBackend(options.UUID).ContainerUnlock(ctx, options)
}
-func (conn *Conn) ContainerSSH(ctx context.Context, options arvados.ContainerSSHOptions) (arvados.ContainerSSHConnection, error) {
+func (conn *Conn) ContainerSSH(ctx context.Context, options arvados.ContainerSSHOptions) (arvados.ConnectionResponse, error) {
return conn.chooseBackend(options.UUID).ContainerSSH(ctx, options)
}
diff --git a/lib/controller/localdb/container_gateway.go b/lib/controller/localdb/container_gateway.go
index 798124654..90c95deb3 100644
--- a/lib/controller/localdb/container_gateway.go
+++ b/lib/controller/localdb/container_gateway.go
@@ -15,11 +15,15 @@ import (
"crypto/x509"
"errors"
"fmt"
+ "io"
+ "io/ioutil"
"net"
"net/http"
"net/url"
"strings"
+ "git.arvados.org/arvados.git/lib/controller/rpc"
+ "git.arvados.org/arvados.git/lib/service"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/auth"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
@@ -27,41 +31,42 @@ import (
"github.com/hashicorp/yamux"
)
+var (
+ forceProxyForTest = false
+ forceInternalURLForTest *arvados.URL
+)
+
// ContainerSSH returns a connection to the SSH server in the
// appropriate crunch-run process on the worker node where the
// specified container is running.
//
// If the returned error is nil, the caller is responsible for closing
// sshconn.Conn.
-func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOptions) (sshconn arvados.ContainerSSHConnection, err error) {
+func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOptions) (sshconn arvados.ConnectionResponse, err error) {
user, err := conn.railsProxy.UserGetCurrent(ctx, arvados.GetOptions{})
if err != nil {
- return
+ return sshconn, err
}
ctr, err := conn.railsProxy.ContainerGet(ctx, arvados.GetOptions{UUID: opts.UUID})
if err != nil {
- return
+ return sshconn, err
}
ctxRoot := auth.NewContext(ctx, &auth.Credentials{Tokens: []string{conn.cluster.SystemRootToken}})
if !user.IsAdmin || !conn.cluster.Containers.ShellAccess.Admin {
if !conn.cluster.Containers.ShellAccess.User {
- err = httpserver.ErrorWithStatus(errors.New("shell access is disabled in config"), http.StatusServiceUnavailable)
- return
+ return sshconn, httpserver.ErrorWithStatus(errors.New("shell access is disabled in config"), http.StatusServiceUnavailable)
}
- var crs arvados.ContainerRequestList
- crs, err = conn.railsProxy.ContainerRequestList(ctxRoot, arvados.ListOptions{Limit: -1, Filters: []arvados.Filter{{"container_uuid", "=", opts.UUID}}})
+ crs, err := conn.railsProxy.ContainerRequestList(ctxRoot, arvados.ListOptions{Limit: -1, Filters: []arvados.Filter{{"container_uuid", "=", opts.UUID}}})
if err != nil {
- return
+ return sshconn, err
}
for _, cr := range crs.Items {
if cr.ModifiedByUserUUID != user.UUID {
- err = httpserver.ErrorWithStatus(errors.New("permission denied: container is associated with requests submitted by other users"), http.StatusForbidden)
- return
+ return sshconn, httpserver.ErrorWithStatus(errors.New("permission denied: container is associated with requests submitted by other users"), http.StatusForbidden)
}
}
if crs.ItemsAvailable != len(crs.Items) {
- err = httpserver.ErrorWithStatus(errors.New("incomplete response while checking permission"), http.StatusInternalServerError)
- return
+ return sshconn, httpserver.ErrorWithStatus(errors.New("incomplete response while checking permission"), http.StatusInternalServerError)
}
}
@@ -70,26 +75,77 @@ func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOpt
conn.gwTunnelsLock.Unlock()
if ctr.State == arvados.ContainerStateQueued || ctr.State == arvados.ContainerStateLocked {
- err = httpserver.ErrorWithStatus(fmt.Errorf("container is not running yet (state is %q)", ctr.State), http.StatusServiceUnavailable)
- return
+ return sshconn, httpserver.ErrorWithStatus(fmt.Errorf("container is not running yet (state is %q)", ctr.State), http.StatusServiceUnavailable)
} else if ctr.State != arvados.ContainerStateRunning {
- err = httpserver.ErrorWithStatus(fmt.Errorf("container has ended (state is %q)", ctr.State), http.StatusGone)
- return
+ return sshconn, httpserver.ErrorWithStatus(fmt.Errorf("container has ended (state is %q)", ctr.State), http.StatusGone)
}
+ // targetHost is the value we'll use in the Host header in our
+ // "Upgrade: ssh" http request. It's just a placeholder
+ // "localhost", unless we decide to connect directly, in which
+ // case we'll set it to the gateway's external ip:host. (The
+ // gateway doesn't even look at it, but we might as well.)
+ targetHost := "localhost"
+ myURL, _ := service.URLFromContext(ctx)
+
var rawconn net.Conn
- if ctr.GatewayAddress != "" && !strings.HasPrefix(ctr.GatewayAddress, "127.0.0.1:") {
+ if host, _, splitErr := net.SplitHostPort(ctr.GatewayAddress); splitErr == nil && host != "" && host != "127.0.0.1" {
+ // If crunch-run provided a GatewayAddress like
+ // "ipaddr:port", that means "ipaddr" is one of the
+ // external interfaces where the gateway is
+ // listening. In that case, it's the most
+ // reliable/direct option, so we use it even if a
+ // tunnel might also be available.
+ targetHost = ctr.GatewayAddress
rawconn, err = net.Dial("tcp", ctr.GatewayAddress)
- } else if tunnel != nil {
+ if err != nil {
+ return sshconn, httpserver.ErrorWithStatus(err, http.StatusServiceUnavailable)
+ }
+ } else if tunnel != nil && !(forceProxyForTest && !opts.NoForward) {
+ // If we can't connect directly, and the gateway has
+ // established a yamux tunnel with us, connect through
+ // the tunnel.
+ //
+ // ...except: forceProxyForTest means we are emulating
+ // a situation where the gateway has established a
+ // yamux tunnel with controller B, and the
+ // ContainerSSH request arrives at controller A. If
+ // opts.NoForward==false then we are acting as A, so
+ // we pretend not to have a tunnel, and fall through
+ // to the "tunurl" case below. If opts.NoForward==true
+ // then the client is A and we are acting as B, so we
+ // connect to our tunnel.
rawconn, err = tunnel.Open()
+ if err != nil {
+ return sshconn, httpserver.ErrorWithStatus(err, http.StatusServiceUnavailable)
+ }
} else if ctr.GatewayAddress == "" {
- err = errors.New("container is running but gateway is not available")
+ return sshconn, httpserver.ErrorWithStatus(errors.New("container is running but gateway is not available"), http.StatusServiceUnavailable)
+ } else if tunurl := strings.TrimPrefix(ctr.GatewayAddress, "tunnel "); tunurl != ctr.GatewayAddress &&
+ tunurl != "" &&
+ tunurl != myURL.String() &&
+ !opts.NoForward {
+ // If crunch-run provided a GatewayAddress like
+ // "tunnel https://10.0.0.10:1010/", that means the
+ // gateway has established a yamux tunnel with the
+ // controller process at the indicated InternalURL
+ // (which isn't us, otherwise we would have had
+ // "tunnel != nil" above). We need to proxy through to
+ // the other controller process in order to use the
+ // tunnel.
+ for u := range conn.cluster.Services.Controller.InternalURLs {
+ if u.String() == tunurl {
+ ctxlog.FromContext(ctx).Debugf("proxying ContainerSSH request to other controller at %s", u)
+ u := url.URL(u)
+ arpc := rpc.NewConn(conn.cluster.ClusterID, &u, conn.cluster.TLS.Insecure, rpc.PassthroughTokenProvider)
+ opts.NoForward = true
+ return arpc.ContainerSSH(ctx, opts)
+ }
+ }
+ ctxlog.FromContext(ctx).Warnf("container gateway provided a tunnel endpoint %s that is not one of Services.Controller.InternalURLs", tunurl)
+ return sshconn, httpserver.ErrorWithStatus(errors.New("container gateway is running but tunnel endpoint is invalid"), http.StatusServiceUnavailable)
} else {
- err = errors.New("container gateway is running but tunnel is down")
- }
- if err != nil {
- err = httpserver.ErrorWithStatus(err, http.StatusServiceUnavailable)
- return
+ return sshconn, httpserver.ErrorWithStatus(errors.New("container gateway is running but tunnel is down"), http.StatusServiceUnavailable)
}
// crunch-run uses a self-signed / unverifiable TLS
@@ -131,27 +187,25 @@ func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOpt
})
err = tlsconn.HandshakeContext(ctx)
if err != nil {
- err = httpserver.ErrorWithStatus(err, http.StatusBadGateway)
- return
+ return sshconn, httpserver.ErrorWithStatus(err, http.StatusBadGateway)
}
if respondAuth == "" {
tlsconn.Close()
- err = httpserver.ErrorWithStatus(errors.New("BUG: no respondAuth"), http.StatusInternalServerError)
- return
+ return sshconn, httpserver.ErrorWithStatus(errors.New("BUG: no respondAuth"), http.StatusInternalServerError)
}
bufr := bufio.NewReader(tlsconn)
bufw := bufio.NewWriter(tlsconn)
u := url.URL{
Scheme: "http",
- Host: ctr.GatewayAddress,
+ Host: targetHost,
Path: "/ssh",
}
postform := url.Values{
"uuid": {opts.UUID},
"detach_keys": {opts.DetachKeys},
"login_username": {opts.LoginUsername},
- "no_forward": {"true"},
+ "no_forward": {fmt.Sprintf("%v", opts.NoForward)},
}
postdata := postform.Encode()
bufw.WriteString("POST " + u.String() + " HTTP/1.1\r\n")
@@ -163,22 +217,25 @@ func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOpt
bufw.WriteString("\r\n")
bufw.WriteString(postdata)
bufw.Flush()
- resp, err := http.ReadResponse(bufr, &http.Request{Method: "GET"})
+ resp, err := http.ReadResponse(bufr, &http.Request{Method: "POST"})
if err != nil {
- err = httpserver.ErrorWithStatus(fmt.Errorf("error reading http response from gateway: %w", err), http.StatusBadGateway)
tlsconn.Close()
- return
+ return sshconn, httpserver.ErrorWithStatus(fmt.Errorf("error reading http response from gateway: %w", err), http.StatusBadGateway)
}
- if resp.Header.Get("X-Arvados-Authorization-Response") != respondAuth {
- err = httpserver.ErrorWithStatus(errors.New("bad X-Arvados-Authorization-Response header"), http.StatusBadGateway)
+ defer resp.Body.Close()
+ if resp.StatusCode != http.StatusSwitchingProtocols {
+ body, _ := ioutil.ReadAll(io.LimitReader(resp.Body, 1000))
tlsconn.Close()
- return
+ return sshconn, httpserver.ErrorWithStatus(fmt.Errorf("unexpected status %s %q", resp.Status, body), http.StatusBadGateway)
}
if strings.ToLower(resp.Header.Get("Upgrade")) != "ssh" ||
strings.ToLower(resp.Header.Get("Connection")) != "upgrade" {
- err = httpserver.ErrorWithStatus(errors.New("bad upgrade"), http.StatusBadGateway)
tlsconn.Close()
- return
+ return sshconn, httpserver.ErrorWithStatus(errors.New("bad upgrade"), http.StatusBadGateway)
+ }
+ if resp.Header.Get("X-Arvados-Authorization-Response") != respondAuth {
+ tlsconn.Close()
+ return sshconn, httpserver.ErrorWithStatus(errors.New("bad X-Arvados-Authorization-Response header"), http.StatusBadGateway)
}
if !ctr.InteractiveSessionStarted {
@@ -190,15 +247,15 @@ func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOpt
})
if err != nil {
tlsconn.Close()
- return
+ return sshconn, httpserver.ErrorWithStatus(err, http.StatusInternalServerError)
}
}
sshconn.Conn = tlsconn
sshconn.Bufrw = &bufio.ReadWriter{Reader: bufr, Writer: bufw}
sshconn.Logger = ctxlog.FromContext(ctx)
- sshconn.UpgradeHeader = "ssh"
- return
+ sshconn.Header = http.Header{"Upgrade": {"ssh"}}
+ return sshconn, nil
}
// ContainerGatewayTunnel sets up a tunnel enabling us (controller) to
@@ -243,6 +300,11 @@ func (conn *Conn) ContainerGatewayTunnel(ctx context.Context, opts arvados.Conta
resp.Conn = clientconn
resp.Bufrw = &bufio.ReadWriter{Reader: bufio.NewReader(&bytes.Buffer{}), Writer: bufio.NewWriter(&bytes.Buffer{})}
resp.Logger = ctxlog.FromContext(ctx)
- resp.UpgradeHeader = "tunnel"
+ resp.Header = http.Header{"Upgrade": {"tunnel"}}
+ if u, ok := service.URLFromContext(ctx); ok {
+ resp.Header.Set("X-Arvados-Internal-Url", u.String())
+ } else if forceInternalURLForTest != nil {
+ resp.Header.Set("X-Arvados-Internal-Url", forceInternalURLForTest.String())
+ }
return
}
diff --git a/lib/controller/localdb/container_gateway_test.go b/lib/controller/localdb/container_gateway_test.go
index b3b604e53..2c882c785 100644
--- a/lib/controller/localdb/container_gateway_test.go
+++ b/lib/controller/localdb/container_gateway_test.go
@@ -13,10 +13,13 @@ import (
"io/ioutil"
"net"
"net/http/httptest"
+ "net/url"
+ "strings"
"time"
"git.arvados.org/arvados.git/lib/config"
"git.arvados.org/arvados.git/lib/controller/router"
+ "git.arvados.org/arvados.git/lib/controller/rpc"
"git.arvados.org/arvados.git/lib/crunchrun"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/arvadostest"
@@ -60,6 +63,11 @@ func (s *ContainerGatewaySuite) SetUpSuite(c *check.C) {
rtr := router.New(s.localdb, router.Config{})
srv := httptest.NewUnstartedServer(rtr)
srv.StartTLS()
+ // the test setup doesn't use lib/service so
+ // service.URLFromContext() returns nothing -- instead, this
+ // is how we advertise our internal URL and enable
+ // proxy-to-other-controller mode,
+ forceInternalURLForTest = &arvados.URL{Scheme: "https", Host: srv.Listener.Addr().String()}
ac := &arvados.Client{
APIHost: srv.Listener.Addr().String(),
AuthToken: arvadostest.Dispatch1Token,
@@ -278,13 +286,46 @@ func (s *ContainerGatewaySuite) TestCreateTunnel(c *check.C) {
c.Check(conn.Conn, check.NotNil)
}
-func (s *ContainerGatewaySuite) TestConnectThroughTunnel(c *check.C) {
+func (s *ContainerGatewaySuite) TestConnectThroughTunnelWithProxyOK(c *check.C) {
+ forceProxyForTest = true
+ defer func() { forceProxyForTest = false }()
+ s.cluster.Services.Controller.InternalURLs[*forceInternalURLForTest] = arvados.ServiceInstance{}
+ defer delete(s.cluster.Services.Controller.InternalURLs, *forceInternalURLForTest)
+ s.testConnectThroughTunnel(c, "")
+}
+
+func (s *ContainerGatewaySuite) TestConnectThroughTunnelWithProxyError(c *check.C) {
+ forceProxyForTest = true
+ defer func() { forceProxyForTest = false }()
+ // forceInternalURLForTest shouldn't be used because it isn't
+ // listed in s.cluster.Services.Controller.InternalURLs
+ s.testConnectThroughTunnel(c, `.*tunnel endpoint is invalid.*`)
+}
+
+func (s *ContainerGatewaySuite) TestConnectThroughTunnelNoProxyOK(c *check.C) {
+ s.testConnectThroughTunnel(c, "")
+}
+
+func (s *ContainerGatewaySuite) testConnectThroughTunnel(c *check.C, expectErrorMatch string) {
+ rootctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{s.cluster.SystemRootToken}})
+ // Until the tunnel starts up, set gateway_address to a value
+ // that can't work. We want to ensure the only way we can
+ // reach the gateway is through the tunnel.
+ gwaddr := "127.0.0.1:0"
tungw := &crunchrun.Gateway{
ContainerUUID: s.ctrUUID,
AuthSecret: s.gw.AuthSecret,
Log: ctxlog.TestLogger(c),
Target: crunchrun.GatewayTargetStub{},
ArvadosClient: s.gw.ArvadosClient,
+ UpdateTunnelURL: func(url string) {
+ c.Logf("UpdateTunnelURL(%q)", url)
+ gwaddr = "tunnel " + url
+ s.localdb.ContainerUpdate(rootctx, arvados.UpdateOptions{
+ UUID: s.ctrUUID,
+ Attrs: map[string]interface{}{
+ "gateway_address": gwaddr}})
+ },
}
c.Assert(tungw.Start(), check.IsNil)
@@ -294,26 +335,30 @@ func (s *ContainerGatewaySuite) TestConnectThroughTunnel(c *check.C) {
c.Assert(err, check.IsNil)
c.Check(host, check.Equals, "127.0.0.1")
- // Set the gateway_address field to 127.0.0.1:badport to
- // ensure the ContainerSSH() handler connects through the
- // tunnel, rather than the gateway server on 127.0.0.1 (which
- // wouldn't work IRL where controller and gateway are on
- // different hosts, but would allow the test to cheat).
- rootctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{s.cluster.SystemRootToken}})
_, err = s.localdb.ContainerUpdate(rootctx, arvados.UpdateOptions{
UUID: s.ctrUUID,
Attrs: map[string]interface{}{
"state": arvados.ContainerStateRunning,
- "gateway_address": "127.0.0.1:0"}})
+ "gateway_address": gwaddr}})
c.Assert(err, check.IsNil)
- ctr, err := s.localdb.ContainerGet(s.ctx, arvados.GetOptions{UUID: s.ctrUUID})
- c.Check(err, check.IsNil)
- c.Check(ctr.InteractiveSessionStarted, check.Equals, false)
- c.Check(ctr.GatewayAddress, check.Equals, "127.0.0.1:0")
+ for deadline := time.Now().Add(5 * time.Second); time.Now().Before(deadline); time.Sleep(time.Second / 2) {
+ ctr, err := s.localdb.ContainerGet(s.ctx, arvados.GetOptions{UUID: s.ctrUUID})
+ c.Assert(err, check.IsNil)
+ c.Check(ctr.InteractiveSessionStarted, check.Equals, false)
+ c.Logf("ctr.GatewayAddress == %s", ctr.GatewayAddress)
+ if strings.HasPrefix(ctr.GatewayAddress, "tunnel ") {
+ break
+ }
+ }
c.Log("connecting to gateway through tunnel")
- sshconn, err := s.localdb.ContainerSSH(s.ctx, arvados.ContainerSSHOptions{UUID: s.ctrUUID})
+ arpc := rpc.NewConn("", &url.URL{Scheme: "https", Host: s.gw.ArvadosClient.APIHost}, true, rpc.PassthroughTokenProvider)
+ sshconn, err := arpc.ContainerSSH(s.ctx, arvados.ContainerSSHOptions{UUID: s.ctrUUID})
+ if expectErrorMatch != "" {
+ c.Check(err, check.ErrorMatches, expectErrorMatch)
+ return
+ }
c.Assert(err, check.IsNil)
c.Assert(sshconn.Conn, check.NotNil)
defer sshconn.Conn.Close()
@@ -344,7 +389,7 @@ func (s *ContainerGatewaySuite) TestConnectThroughTunnel(c *check.C) {
case <-time.After(time.Second):
c.Fail()
}
- ctr, err = s.localdb.ContainerGet(s.ctx, arvados.GetOptions{UUID: s.ctrUUID})
+ ctr, err := s.localdb.ContainerGet(s.ctx, arvados.GetOptions{UUID: s.ctrUUID})
c.Check(err, check.IsNil)
c.Check(ctr.InteractiveSessionStarted, check.Equals, true)
}
diff --git a/lib/controller/router/request.go b/lib/controller/router/request.go
index 06141b103..31f2e1d7b 100644
--- a/lib/controller/router/request.go
+++ b/lib/controller/router/request.go
@@ -176,6 +176,7 @@ var boolParams = map[string]bool{
"bypass_federation": true,
"recursive": true,
"exclude_home_project": true,
+ "no_forward": true,
}
func stringToBool(s string) bool {
diff --git a/lib/controller/rpc/conn.go b/lib/controller/rpc/conn.go
index 8e25ca0d0..1475a5e01 100644
--- a/lib/controller/rpc/conn.go
+++ b/lib/controller/rpc/conn.go
@@ -23,6 +23,7 @@ import (
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/auth"
+ "git.arvados.org/arvados.git/sdk/go/ctxlog"
"git.arvados.org/arvados.git/sdk/go/httpserver"
)
@@ -331,21 +332,17 @@ func (conn *Conn) ContainerUnlock(ctx context.Context, options arvados.GetOption
// ContainerSSH returns a connection to the out-of-band SSH server for
// a running container. If the returned error is nil, the caller is
// responsible for closing sshconn.Conn.
-func (conn *Conn) ContainerSSH(ctx context.Context, options arvados.ContainerSSHOptions) (sshconn arvados.ContainerSSHConnection, err error) {
+func (conn *Conn) ContainerSSH(ctx context.Context, options arvados.ContainerSSHOptions) (sshconn arvados.ConnectionResponse, err error) {
u, err := conn.baseURL.Parse("/" + strings.Replace(arvados.EndpointContainerSSH.Path, "{uuid}", options.UUID, -1))
if err != nil {
err = fmt.Errorf("url.Parse: %w", err)
return
}
- u.RawQuery = url.Values{
+ return conn.socket(ctx, u, "ssh", url.Values{
"detach_keys": {options.DetachKeys},
"login_username": {options.LoginUsername},
- }.Encode()
- resp, err := conn.socket(ctx, u, "ssh", nil)
- if err != nil {
- return
- }
- return arvados.ContainerSSHConnection(resp), nil
+ "no_forward": {fmt.Sprintf("%v", options.NoForward)},
+ })
}
// ContainerGatewayTunnel returns a connection to a yamux session on
@@ -376,8 +373,7 @@ func (conn *Conn) socket(ctx context.Context, u *url.URL, upgradeHeader string,
}
netconn, err := tls.Dial("tcp", addr, &tls.Config{InsecureSkipVerify: insecure})
if err != nil {
- err = fmt.Errorf("tls.Dial: %w", err)
- return
+ return connresp, fmt.Errorf("tls.Dial: %w", err)
}
defer func() {
if err != nil {
@@ -389,10 +385,9 @@ func (conn *Conn) socket(ctx context.Context, u *url.URL, upgradeHeader string,
tokens, err := conn.tokenProvider(ctx)
if err != nil {
- return
+ return connresp, err
} else if len(tokens) < 1 {
- err = httpserver.ErrorWithStatus(errors.New("unauthorized"), http.StatusUnauthorized)
- return
+ return connresp, httpserver.ErrorWithStatus(errors.New("unauthorized"), http.StatusUnauthorized)
}
postdata := postform.Encode()
bufw.WriteString("POST " + u.String() + " HTTP/1.1\r\n")
@@ -402,18 +397,16 @@ func (conn *Conn) socket(ctx context.Context, u *url.URL, upgradeHeader string,
bufw.WriteString("Content-Type: application/x-www-form-urlencoded\r\n")
fmt.Fprintf(bufw, "Content-Length: %d\r\n", len(postdata))
bufw.WriteString("\r\n")
- if len(postdata) > 0 {
- bufw.WriteString(postdata)
- }
+ bufw.WriteString(postdata)
bufw.Flush()
- resp, err := http.ReadResponse(bufr, &http.Request{Method: "GET"})
+ resp, err := http.ReadResponse(bufr, &http.Request{Method: "POST"})
if err != nil {
- err = fmt.Errorf("http.ReadResponse: %w", err)
- return
+ return connresp, fmt.Errorf("http.ReadResponse: %w", err)
}
+ defer resp.Body.Close()
if resp.StatusCode != http.StatusSwitchingProtocols {
- defer resp.Body.Close()
- body, _ := ioutil.ReadAll(resp.Body)
+ ctxlog.FromContext(ctx).Infof("rpc.Conn.socket: server %s did not switch protocols, got status %s", u.String(), resp.Status)
+ body, _ := ioutil.ReadAll(io.LimitReader(resp.Body, 10000))
var message string
var errDoc httpserver.ErrorResponse
if err := json.Unmarshal(body, &errDoc); err == nil {
@@ -421,17 +414,16 @@ func (conn *Conn) socket(ctx context.Context, u *url.URL, upgradeHeader string,
} else {
message = fmt.Sprintf("%q", body)
}
- err = fmt.Errorf("server did not provide a tunnel: %s (HTTP %d)", message, resp.StatusCode)
- return
+ return connresp, fmt.Errorf("server did not provide a tunnel: %s %s", resp.Status, message)
}
if strings.ToLower(resp.Header.Get("Upgrade")) != upgradeHeader ||
strings.ToLower(resp.Header.Get("Connection")) != "upgrade" {
- err = fmt.Errorf("bad response from server: Upgrade %q Connection %q", resp.Header.Get("Upgrade"), resp.Header.Get("Connection"))
- return
+ return connresp, fmt.Errorf("bad response from server: Upgrade %q Connection %q", resp.Header.Get("Upgrade"), resp.Header.Get("Connection"))
}
connresp.Conn = netconn
connresp.Bufrw = &bufio.ReadWriter{Reader: bufr, Writer: bufw}
- return
+ connresp.Header = resp.Header
+ return connresp, nil
}
func (conn *Conn) ContainerRequestCreate(ctx context.Context, options arvados.CreateOptions) (arvados.ContainerRequest, error) {
diff --git a/lib/crunchrun/container_gateway.go b/lib/crunchrun/container_gateway.go
index ba52f8ab4..02df06cf2 100644
--- a/lib/crunchrun/container_gateway.go
+++ b/lib/crunchrun/container_gateway.go
@@ -73,6 +73,11 @@ type Gateway struct {
// address is unknown or not routable from controller.
ArvadosClient *arvados.Client
+ // When a tunnel is connected or reconnected, this func (if
+ // not nil) will be called with the InternalURL of the
+ // controller process at the other end of the tunnel.
+ UpdateTunnelURL func(url string)
+
sshConfig ssh.ServerConfig
requestAuth string
respondAuth string
@@ -209,6 +214,9 @@ func (gw *Gateway) runTunnel(addr string) error {
if err != nil {
return fmt.Errorf("error setting up mux client end: %s", err)
}
+ if url := tun.Header.Get("X-Arvados-Internal-Url"); url != "" && gw.UpdateTunnelURL != nil {
+ gw.UpdateTunnelURL(url)
+ }
for {
muxconn, err := mux.Accept()
if err != nil {
diff --git a/lib/crunchrun/crunchrun.go b/lib/crunchrun/crunchrun.go
index 693df171e..c4b9cfcb1 100644
--- a/lib/crunchrun/crunchrun.go
+++ b/lib/crunchrun/crunchrun.go
@@ -1912,6 +1912,20 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
Target: cr.executor,
Log: cr.CrunchLog,
ArvadosClient: cr.dispatcherClient,
+ UpdateTunnelURL: func(url string) {
+ if gwListen != "" {
+ // prefer connecting directly
+ return
+ }
+ // direct connection won't work, so we
+ // use the gateway_address field to
+ // indicate the internalURL of the
+ // controller process that has the
+ // current tunnel connection.
+ cr.gateway.Address = "tunnel " + url
+ cr.DispatcherArvClient.Update("containers", containerUUID,
+ arvadosclient.Dict{"container": arvadosclient.Dict{"gateway_address": cr.gateway.Address}}, nil)
+ },
}
err = cr.gateway.Start()
if err != nil {
diff --git a/sdk/go/arvados/api.go b/sdk/go/arvados/api.go
index 8a41cb851..3797a17f5 100644
--- a/sdk/go/arvados/api.go
+++ b/sdk/go/arvados/api.go
@@ -10,6 +10,7 @@ import (
"encoding/json"
"io"
"net"
+ "net/http"
"github.com/sirupsen/logrus"
)
@@ -100,13 +101,11 @@ type ContainerSSHOptions struct {
NoForward bool `json:"no_forward"`
}
-type ContainerSSHConnection ConnectionResponse
-
type ConnectionResponse struct {
- Conn net.Conn `json:"-"`
- Bufrw *bufio.ReadWriter `json:"-"`
- Logger logrus.FieldLogger `json:"-"`
- UpgradeHeader string `json:"-"`
+ Conn net.Conn `json:"-"`
+ Bufrw *bufio.ReadWriter `json:"-"`
+ Logger logrus.FieldLogger `json:"-"`
+ Header http.Header `json:"-"`
}
type ContainerGatewayTunnelOptions struct {
@@ -264,7 +263,7 @@ type API interface {
ContainerDelete(ctx context.Context, options DeleteOptions) (Container, error)
ContainerLock(ctx context.Context, options GetOptions) (Container, error)
ContainerUnlock(ctx context.Context, options GetOptions) (Container, error)
- ContainerSSH(ctx context.Context, options ContainerSSHOptions) (ContainerSSHConnection, error)
+ ContainerSSH(ctx context.Context, options ContainerSSHOptions) (ConnectionResponse, error)
ContainerGatewayTunnel(ctx context.Context, options ContainerGatewayTunnelOptions) (ConnectionResponse, error)
ContainerRequestCreate(ctx context.Context, options CreateOptions) (ContainerRequest, error)
ContainerRequestUpdate(ctx context.Context, options UpdateOptions) (ContainerRequest, error)
diff --git a/sdk/go/arvados/container_gateway.go b/sdk/go/arvados/container_gateway.go
index d1d512856..ce33fb310 100644
--- a/sdk/go/arvados/container_gateway.go
+++ b/sdk/go/arvados/container_gateway.go
@@ -21,7 +21,9 @@ func (cresp ConnectionResponse) ServeHTTP(w http.ResponseWriter, req *http.Reque
return
}
w.Header().Set("Connection", "upgrade")
- w.Header().Set("Upgrade", cresp.UpgradeHeader)
+ for k, v := range cresp.Header {
+ w.Header()[k] = v
+ }
w.WriteHeader(http.StatusSwitchingProtocols)
conn, bufrw, err := hj.Hijack()
if err != nil {
@@ -32,7 +34,7 @@ func (cresp ConnectionResponse) ServeHTTP(w http.ResponseWriter, req *http.Reque
var bytesIn, bytesOut int64
var wg sync.WaitGroup
- ctx, cancel := context.WithCancel(context.Background())
+ ctx, cancel := context.WithCancel(req.Context())
wg.Add(1)
go func() {
defer wg.Done()
@@ -44,7 +46,7 @@ func (cresp ConnectionResponse) ServeHTTP(w http.ResponseWriter, req *http.Reque
bytesOut += n
}
if err != nil {
- ctxlog.FromContext(req.Context()).WithError(err).Error("error copying downstream")
+ ctxlog.FromContext(ctx).WithError(err).Error("error copying downstream")
}
}()
wg.Add(1)
@@ -58,17 +60,14 @@ func (cresp ConnectionResponse) ServeHTTP(w http.ResponseWriter, req *http.Reque
bytesIn += n
}
if err != nil {
- ctxlog.FromContext(req.Context()).WithError(err).Error("error copying upstream")
+ ctxlog.FromContext(ctx).WithError(err).Error("error copying upstream")
}
}()
- <-ctx.Done()
+ wg.Wait()
if cresp.Logger != nil {
- go func() {
- wg.Wait()
- cresp.Logger.WithFields(logrus.Fields{
- "bytesIn": bytesIn,
- "bytesOut": bytesOut,
- }).Info("closed connection")
- }()
+ cresp.Logger.WithFields(logrus.Fields{
+ "bytesIn": bytesIn,
+ "bytesOut": bytesOut,
+ }).Info("closed connection")
}
}
diff --git a/sdk/go/arvadostest/api.go b/sdk/go/arvadostest/api.go
index d784abf67..d6da579d6 100644
--- a/sdk/go/arvadostest/api.go
+++ b/sdk/go/arvadostest/api.go
@@ -109,9 +109,9 @@ func (as *APIStub) ContainerUnlock(ctx context.Context, options arvados.GetOptio
as.appendCall(ctx, as.ContainerUnlock, options)
return arvados.Container{}, as.Error
}
-func (as *APIStub) ContainerSSH(ctx context.Context, options arvados.ContainerSSHOptions) (arvados.ContainerSSHConnection, error) {
+func (as *APIStub) ContainerSSH(ctx context.Context, options arvados.ContainerSSHOptions) (arvados.ConnectionResponse, error) {
as.appendCall(ctx, as.ContainerSSH, options)
- return arvados.ContainerSSHConnection{}, as.Error
+ return arvados.ConnectionResponse{}, as.Error
}
func (as *APIStub) ContainerGatewayTunnel(ctx context.Context, options arvados.ContainerGatewayTunnelOptions) (arvados.ConnectionResponse, error) {
as.appendCall(ctx, as.ContainerGatewayTunnel, options)
commit 38c851cc040e05e3c6cf9a369a8b156602ace895
Author: Tom Clegg <tom at curii.com>
Date: Sun Jun 19 21:30:06 2022 -0400
19166: Move args from headers to post form, add no_forward flag.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/controller/localdb/container_gateway.go b/lib/controller/localdb/container_gateway.go
index fcfa599e4..798124654 100644
--- a/lib/controller/localdb/container_gateway.go
+++ b/lib/controller/localdb/container_gateway.go
@@ -147,14 +147,21 @@ func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOpt
Host: ctr.GatewayAddress,
Path: "/ssh",
}
+ postform := url.Values{
+ "uuid": {opts.UUID},
+ "detach_keys": {opts.DetachKeys},
+ "login_username": {opts.LoginUsername},
+ "no_forward": {"true"},
+ }
+ postdata := postform.Encode()
bufw.WriteString("POST " + u.String() + " HTTP/1.1\r\n")
bufw.WriteString("Host: " + u.Host + "\r\n")
bufw.WriteString("Upgrade: ssh\r\n")
- bufw.WriteString("X-Arvados-Target-Uuid: " + opts.UUID + "\r\n")
bufw.WriteString("X-Arvados-Authorization: " + requestAuth + "\r\n")
- bufw.WriteString("X-Arvados-Detach-Keys: " + opts.DetachKeys + "\r\n")
- bufw.WriteString("X-Arvados-Login-Username: " + opts.LoginUsername + "\r\n")
+ bufw.WriteString("Content-Type: application/x-www-form-urlencoded\r\n")
+ fmt.Fprintf(bufw, "Content-Length: %d\r\n", len(postdata))
bufw.WriteString("\r\n")
+ bufw.WriteString(postdata)
bufw.Flush()
resp, err := http.ReadResponse(bufr, &http.Request{Method: "GET"})
if err != nil {
diff --git a/lib/crunchrun/container_gateway.go b/lib/crunchrun/container_gateway.go
index 49eb68c0f..ba52f8ab4 100644
--- a/lib/crunchrun/container_gateway.go
+++ b/lib/crunchrun/container_gateway.go
@@ -267,7 +267,8 @@ func (gw *Gateway) handleSSH(w http.ResponseWriter, req *http.Request) {
http.Error(w, "path not found", http.StatusNotFound)
return
}
- if want := req.Header.Get("X-Arvados-Target-Uuid"); want != gw.ContainerUUID {
+ req.ParseForm()
+ if want := req.Form.Get("uuid"); want != gw.ContainerUUID {
http.Error(w, fmt.Sprintf("misdirected request: meant for %q but received by crunch-run %q", want, gw.ContainerUUID), http.StatusBadGateway)
return
}
@@ -275,8 +276,8 @@ func (gw *Gateway) handleSSH(w http.ResponseWriter, req *http.Request) {
http.Error(w, "bad X-Arvados-Authorization header", http.StatusUnauthorized)
return
}
- detachKeys := req.Header.Get("X-Arvados-Detach-Keys")
- username := req.Header.Get("X-Arvados-Login-Username")
+ detachKeys := req.Form.Get("detach_keys")
+ username := req.Form.Get("login_username")
if username == "" {
username = "root"
}
diff --git a/sdk/go/arvados/api.go b/sdk/go/arvados/api.go
index 1b303ffb4..8a41cb851 100644
--- a/sdk/go/arvados/api.go
+++ b/sdk/go/arvados/api.go
@@ -97,6 +97,7 @@ type ContainerSSHOptions struct {
UUID string `json:"uuid"`
DetachKeys string `json:"detach_keys"`
LoginUsername string `json:"login_username"`
+ NoForward bool `json:"no_forward"`
}
type ContainerSSHConnection ConnectionResponse
commit 6c82979bb16b0c96653c36cbc1660c7f049f763a
Author: Tom Clegg <tom at curii.com>
Date: Sun Jun 19 01:24:17 2022 -0400
Handle multiple incoming X-Forwarded-For headers properly.
(in case Nginx is not there to merge them for us)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/controller/proxy.go b/lib/controller/proxy.go
index 13dfcac16..47b8cb471 100644
--- a/lib/controller/proxy.go
+++ b/lib/controller/proxy.go
@@ -63,10 +63,13 @@ func (p *proxy) Do(
hdrOut[k] = v
}
}
- xff := reqIn.RemoteAddr
- if xffIn := reqIn.Header.Get("X-Forwarded-For"); xffIn != "" {
- xff = xffIn + "," + xff
+ xff := ""
+ for _, xffIn := range reqIn.Header["X-Forwarded-For"] {
+ if xffIn != "" {
+ xff += xffIn + ","
+ }
}
+ xff += reqIn.RemoteAddr
hdrOut.Set("X-Forwarded-For", xff)
if hdrOut.Get("X-Forwarded-Proto") == "" {
hdrOut.Set("X-Forwarded-Proto", reqIn.URL.Scheme)
commit 6d64c06dfeaac8d29f2b7c19e1f120bfb0ddf7db
Author: Tom Clegg <tom at curii.com>
Date: Sat Jun 18 00:34:03 2022 -0400
19166: Set up tunnel for container gateway requests
in slurm/lsf environments, where controller doesn't know how to (and
perhaps can't) connect directly to the compute node where crunch-run
is running.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/go.mod b/go.mod
index 525bae11e..aced60dbc 100644
--- a/go.mod
+++ b/go.mod
@@ -72,6 +72,7 @@ require (
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
github.com/golang/protobuf v1.5.0 // indirect
github.com/googleapis/gax-go/v2 v2.0.5 // indirect
+ github.com/hashicorp/yamux v0.0.0-20211028200310-0bc27b27de87 // indirect
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af // indirect
github.com/kevinburke/ssh_config v0.0.0-20171013211458-802051befeb5 // indirect
diff --git a/go.sum b/go.sum
index 82a8d83d7..422a891e0 100644
--- a/go.sum
+++ b/go.sum
@@ -433,6 +433,8 @@ github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ
github.com/hashicorp/golang-lru v0.5.1 h1:0hERBMJE1eitiLkihrMvRVBYAkpHzc/J3QdDN+dAcgU=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
+github.com/hashicorp/yamux v0.0.0-20211028200310-0bc27b27de87 h1:xixZ2bWeofWV68J+x6AzmKuVM/JWCQwkWm6GW/MUR6I=
+github.com/hashicorp/yamux v0.0.0-20211028200310-0bc27b27de87/go.mod h1:CtWFDAQgb7dxtzFs4tWbplKIe2jSi3+5vKbgIO0SLnQ=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
diff --git a/lib/controller/federation/conn.go b/lib/controller/federation/conn.go
index d9f587852..08d3ab1a6 100644
--- a/lib/controller/federation/conn.go
+++ b/lib/controller/federation/conn.go
@@ -379,6 +379,10 @@ func (conn *Conn) ContainerSSH(ctx context.Context, options arvados.ContainerSSH
return conn.chooseBackend(options.UUID).ContainerSSH(ctx, options)
}
+func (conn *Conn) ContainerGatewayTunnel(ctx context.Context, options arvados.ContainerGatewayTunnelOptions) (arvados.ConnectionResponse, error) {
+ return conn.chooseBackend(options.UUID).ContainerGatewayTunnel(ctx, options)
+}
+
func (conn *Conn) ContainerRequestList(ctx context.Context, options arvados.ListOptions) (arvados.ContainerRequestList, error) {
return conn.generated_ContainerRequestList(ctx, options)
}
diff --git a/lib/controller/localdb/conn.go b/lib/controller/localdb/conn.go
index 104cfe28f..a36822ad6 100644
--- a/lib/controller/localdb/conn.go
+++ b/lib/controller/localdb/conn.go
@@ -11,6 +11,7 @@ import (
"net/http"
"os"
"strings"
+ "sync"
"time"
"git.arvados.org/arvados.git/lib/controller/railsproxy"
@@ -18,6 +19,7 @@ import (
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
"git.arvados.org/arvados.git/sdk/go/httpserver"
+ "github.com/hashicorp/yamux"
"github.com/sirupsen/logrus"
)
@@ -31,6 +33,8 @@ type Conn struct {
lastVocabularyRefreshCheck time.Time
lastVocabularyError error
loginController
+ gwTunnels map[string]*yamux.Session
+ gwTunnelsLock sync.Mutex
}
func NewConn(cluster *arvados.Cluster) *Conn {
diff --git a/lib/controller/localdb/container_gateway.go b/lib/controller/localdb/container_gateway.go
index 3b40eccaf..fcfa599e4 100644
--- a/lib/controller/localdb/container_gateway.go
+++ b/lib/controller/localdb/container_gateway.go
@@ -6,13 +6,16 @@ package localdb
import (
"bufio"
+ "bytes"
"context"
"crypto/hmac"
"crypto/sha256"
+ "crypto/subtle"
"crypto/tls"
"crypto/x509"
"errors"
"fmt"
+ "net"
"net/http"
"net/url"
"strings"
@@ -21,6 +24,7 @@ import (
"git.arvados.org/arvados.git/sdk/go/auth"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
"git.arvados.org/arvados.git/sdk/go/httpserver"
+ "github.com/hashicorp/yamux"
)
// ContainerSSH returns a connection to the SSH server in the
@@ -61,19 +65,33 @@ func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOpt
}
}
- switch ctr.State {
- case arvados.ContainerStateQueued, arvados.ContainerStateLocked:
+ conn.gwTunnelsLock.Lock()
+ tunnel := conn.gwTunnels[opts.UUID]
+ conn.gwTunnelsLock.Unlock()
+
+ if ctr.State == arvados.ContainerStateQueued || ctr.State == arvados.ContainerStateLocked {
err = httpserver.ErrorWithStatus(fmt.Errorf("container is not running yet (state is %q)", ctr.State), http.StatusServiceUnavailable)
return
- case arvados.ContainerStateRunning:
- if ctr.GatewayAddress == "" {
- err = httpserver.ErrorWithStatus(errors.New("container is running but gateway is not available -- installation problem or feature not supported"), http.StatusServiceUnavailable)
- return
- }
- default:
+ } else if ctr.State != arvados.ContainerStateRunning {
err = httpserver.ErrorWithStatus(fmt.Errorf("container has ended (state is %q)", ctr.State), http.StatusGone)
return
}
+
+ var rawconn net.Conn
+ if ctr.GatewayAddress != "" && !strings.HasPrefix(ctr.GatewayAddress, "127.0.0.1:") {
+ rawconn, err = net.Dial("tcp", ctr.GatewayAddress)
+ } else if tunnel != nil {
+ rawconn, err = tunnel.Open()
+ } else if ctr.GatewayAddress == "" {
+ err = errors.New("container is running but gateway is not available")
+ } else {
+ err = errors.New("container gateway is running but tunnel is down")
+ }
+ if err != nil {
+ err = httpserver.ErrorWithStatus(err, http.StatusServiceUnavailable)
+ return
+ }
+
// crunch-run uses a self-signed / unverifiable TLS
// certificate, so we use the following scheme to ensure we're
// not talking to a MITM.
@@ -93,7 +111,7 @@ func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOpt
// X-Arvados-Authorization-Response header, proving that the
// server knows ctrKey.
var requestAuth, respondAuth string
- netconn, err := tls.Dial("tcp", ctr.GatewayAddress, &tls.Config{
+ tlsconn := tls.Client(rawconn, &tls.Config{
InsecureSkipVerify: true,
VerifyPeerCertificate: func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error {
if len(rawCerts) == 0 {
@@ -111,23 +129,25 @@ func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOpt
return nil
},
})
+ err = tlsconn.HandshakeContext(ctx)
if err != nil {
err = httpserver.ErrorWithStatus(err, http.StatusBadGateway)
return
}
if respondAuth == "" {
+ tlsconn.Close()
err = httpserver.ErrorWithStatus(errors.New("BUG: no respondAuth"), http.StatusInternalServerError)
return
}
- bufr := bufio.NewReader(netconn)
- bufw := bufio.NewWriter(netconn)
+ bufr := bufio.NewReader(tlsconn)
+ bufw := bufio.NewWriter(tlsconn)
u := url.URL{
Scheme: "http",
Host: ctr.GatewayAddress,
Path: "/ssh",
}
- bufw.WriteString("GET " + u.String() + " HTTP/1.1\r\n")
+ bufw.WriteString("POST " + u.String() + " HTTP/1.1\r\n")
bufw.WriteString("Host: " + u.Host + "\r\n")
bufw.WriteString("Upgrade: ssh\r\n")
bufw.WriteString("X-Arvados-Target-Uuid: " + opts.UUID + "\r\n")
@@ -139,18 +159,18 @@ func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOpt
resp, err := http.ReadResponse(bufr, &http.Request{Method: "GET"})
if err != nil {
err = httpserver.ErrorWithStatus(fmt.Errorf("error reading http response from gateway: %w", err), http.StatusBadGateway)
- netconn.Close()
+ tlsconn.Close()
return
}
if resp.Header.Get("X-Arvados-Authorization-Response") != respondAuth {
err = httpserver.ErrorWithStatus(errors.New("bad X-Arvados-Authorization-Response header"), http.StatusBadGateway)
- netconn.Close()
+ tlsconn.Close()
return
}
if strings.ToLower(resp.Header.Get("Upgrade")) != "ssh" ||
strings.ToLower(resp.Header.Get("Connection")) != "upgrade" {
err = httpserver.ErrorWithStatus(errors.New("bad upgrade"), http.StatusBadGateway)
- netconn.Close()
+ tlsconn.Close()
return
}
@@ -162,13 +182,60 @@ func (conn *Conn) ContainerSSH(ctx context.Context, opts arvados.ContainerSSHOpt
},
})
if err != nil {
- netconn.Close()
+ tlsconn.Close()
return
}
}
- sshconn.Conn = netconn
+ sshconn.Conn = tlsconn
sshconn.Bufrw = &bufio.ReadWriter{Reader: bufr, Writer: bufw}
sshconn.Logger = ctxlog.FromContext(ctx)
+ sshconn.UpgradeHeader = "ssh"
+ return
+}
+
+// ContainerGatewayTunnel sets up a tunnel enabling us (controller) to
+// connect to the caller's (crunch-run's) gateway server.
+func (conn *Conn) ContainerGatewayTunnel(ctx context.Context, opts arvados.ContainerGatewayTunnelOptions) (resp arvados.ConnectionResponse, err error) {
+ h := hmac.New(sha256.New, []byte(conn.cluster.SystemRootToken))
+ fmt.Fprint(h, opts.UUID)
+ authSecret := fmt.Sprintf("%x", h.Sum(nil))
+ if subtle.ConstantTimeCompare([]byte(authSecret), []byte(opts.AuthSecret)) != 1 {
+ ctxlog.FromContext(ctx).Info("received incorrect auth_secret")
+ return resp, httpserver.ErrorWithStatus(errors.New("authentication error"), http.StatusUnauthorized)
+ }
+
+ muxconn, clientconn := net.Pipe()
+ tunnel, err := yamux.Server(muxconn, nil)
+ if err != nil {
+ clientconn.Close()
+ return resp, httpserver.ErrorWithStatus(err, http.StatusInternalServerError)
+ }
+
+ conn.gwTunnelsLock.Lock()
+ if conn.gwTunnels == nil {
+ conn.gwTunnels = map[string]*yamux.Session{opts.UUID: tunnel}
+ } else {
+ conn.gwTunnels[opts.UUID] = tunnel
+ }
+ conn.gwTunnelsLock.Unlock()
+
+ go func() {
+ <-tunnel.CloseChan()
+ conn.gwTunnelsLock.Lock()
+ if conn.gwTunnels[opts.UUID] == tunnel {
+ delete(conn.gwTunnels, opts.UUID)
+ }
+ conn.gwTunnelsLock.Unlock()
+ }()
+
+ // Assuming we're acting as the backend of an http server,
+ // lib/controller/router will call resp's ServeHTTP handler,
+ // which upgrades the incoming http connection to a raw socket
+ // and connects it to our yamux.Server through our net.Pipe().
+ resp.Conn = clientconn
+ resp.Bufrw = &bufio.ReadWriter{Reader: bufio.NewReader(&bytes.Buffer{}), Writer: bufio.NewWriter(&bytes.Buffer{})}
+ resp.Logger = ctxlog.FromContext(ctx)
+ resp.UpgradeHeader = "tunnel"
return
}
diff --git a/lib/controller/localdb/container_gateway_test.go b/lib/controller/localdb/container_gateway_test.go
index 271760420..b3b604e53 100644
--- a/lib/controller/localdb/container_gateway_test.go
+++ b/lib/controller/localdb/container_gateway_test.go
@@ -12,9 +12,11 @@ import (
"io"
"io/ioutil"
"net"
+ "net/http/httptest"
"time"
"git.arvados.org/arvados.git/lib/config"
+ "git.arvados.org/arvados.git/lib/controller/router"
"git.arvados.org/arvados.git/lib/crunchrun"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/arvadostest"
@@ -55,12 +57,21 @@ func (s *ContainerGatewaySuite) SetUpSuite(c *check.C) {
fmt.Fprint(h, s.ctrUUID)
authKey := fmt.Sprintf("%x", h.Sum(nil))
+ rtr := router.New(s.localdb, router.Config{})
+ srv := httptest.NewUnstartedServer(rtr)
+ srv.StartTLS()
+ ac := &arvados.Client{
+ APIHost: srv.Listener.Addr().String(),
+ AuthToken: arvadostest.Dispatch1Token,
+ Insecure: true,
+ }
s.gw = &crunchrun.Gateway{
ContainerUUID: s.ctrUUID,
AuthSecret: authKey,
Address: "localhost:0",
Log: ctxlog.TestLogger(c),
Target: crunchrun.GatewayTargetStub{},
+ ArvadosClient: ac,
}
c.Assert(s.gw.Start(), check.IsNil)
rootctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{s.cluster.SystemRootToken}})
@@ -69,18 +80,25 @@ func (s *ContainerGatewaySuite) SetUpSuite(c *check.C) {
Attrs: map[string]interface{}{
"state": arvados.ContainerStateLocked}})
c.Assert(err, check.IsNil)
- _, err = s.localdb.ContainerUpdate(rootctx, arvados.UpdateOptions{
+}
+
+func (s *ContainerGatewaySuite) SetUpTest(c *check.C) {
+ // clear any tunnel sessions started by previous test cases
+ s.localdb.gwTunnelsLock.Lock()
+ s.localdb.gwTunnels = nil
+ s.localdb.gwTunnelsLock.Unlock()
+
+ rootctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{s.cluster.SystemRootToken}})
+ _, err := s.localdb.ContainerUpdate(rootctx, arvados.UpdateOptions{
UUID: s.ctrUUID,
Attrs: map[string]interface{}{
"state": arvados.ContainerStateRunning,
"gateway_address": s.gw.Address}})
c.Assert(err, check.IsNil)
-}
-func (s *ContainerGatewaySuite) SetUpTest(c *check.C) {
s.cluster.Containers.ShellAccess.Admin = true
s.cluster.Containers.ShellAccess.User = true
- _, err := arvadostest.DB(c, s.cluster).Exec(`update containers set interactive_session_started=$1 where uuid=$2`, false, s.ctrUUID)
+ _, err = arvadostest.DB(c, s.cluster).Exec(`update containers set interactive_session_started=$1 where uuid=$2`, false, s.ctrUUID)
c.Check(err, check.IsNil)
}
@@ -234,3 +252,99 @@ func (s *ContainerGatewaySuite) TestConnectFail(c *check.C) {
_, err = s.localdb.ContainerSSH(ctx, arvados.ContainerSSHOptions{UUID: s.ctrUUID})
c.Check(err, check.ErrorMatches, `.* 404 .*`)
}
+
+func (s *ContainerGatewaySuite) TestCreateTunnel(c *check.C) {
+ // no AuthSecret
+ conn, err := s.localdb.ContainerGatewayTunnel(s.ctx, arvados.ContainerGatewayTunnelOptions{
+ UUID: s.ctrUUID,
+ })
+ c.Check(err, check.ErrorMatches, `authentication error`)
+ c.Check(conn.Conn, check.IsNil)
+
+ // bogus AuthSecret
+ conn, err = s.localdb.ContainerGatewayTunnel(s.ctx, arvados.ContainerGatewayTunnelOptions{
+ UUID: s.ctrUUID,
+ AuthSecret: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
+ })
+ c.Check(err, check.ErrorMatches, `authentication error`)
+ c.Check(conn.Conn, check.IsNil)
+
+ // good AuthSecret
+ conn, err = s.localdb.ContainerGatewayTunnel(s.ctx, arvados.ContainerGatewayTunnelOptions{
+ UUID: s.ctrUUID,
+ AuthSecret: s.gw.AuthSecret,
+ })
+ c.Check(err, check.IsNil)
+ c.Check(conn.Conn, check.NotNil)
+}
+
+func (s *ContainerGatewaySuite) TestConnectThroughTunnel(c *check.C) {
+ tungw := &crunchrun.Gateway{
+ ContainerUUID: s.ctrUUID,
+ AuthSecret: s.gw.AuthSecret,
+ Log: ctxlog.TestLogger(c),
+ Target: crunchrun.GatewayTargetStub{},
+ ArvadosClient: s.gw.ArvadosClient,
+ }
+ c.Assert(tungw.Start(), check.IsNil)
+
+ // We didn't supply an external hostname in the Address field,
+ // so Start() should assign a local address.
+ host, _, err := net.SplitHostPort(tungw.Address)
+ c.Assert(err, check.IsNil)
+ c.Check(host, check.Equals, "127.0.0.1")
+
+ // Set the gateway_address field to 127.0.0.1:badport to
+ // ensure the ContainerSSH() handler connects through the
+ // tunnel, rather than the gateway server on 127.0.0.1 (which
+ // wouldn't work IRL where controller and gateway are on
+ // different hosts, but would allow the test to cheat).
+ rootctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{s.cluster.SystemRootToken}})
+ _, err = s.localdb.ContainerUpdate(rootctx, arvados.UpdateOptions{
+ UUID: s.ctrUUID,
+ Attrs: map[string]interface{}{
+ "state": arvados.ContainerStateRunning,
+ "gateway_address": "127.0.0.1:0"}})
+ c.Assert(err, check.IsNil)
+
+ ctr, err := s.localdb.ContainerGet(s.ctx, arvados.GetOptions{UUID: s.ctrUUID})
+ c.Check(err, check.IsNil)
+ c.Check(ctr.InteractiveSessionStarted, check.Equals, false)
+ c.Check(ctr.GatewayAddress, check.Equals, "127.0.0.1:0")
+
+ c.Log("connecting to gateway through tunnel")
+ sshconn, err := s.localdb.ContainerSSH(s.ctx, arvados.ContainerSSHOptions{UUID: s.ctrUUID})
+ c.Assert(err, check.IsNil)
+ c.Assert(sshconn.Conn, check.NotNil)
+ defer sshconn.Conn.Close()
+
+ done := make(chan struct{})
+ go func() {
+ defer close(done)
+
+ // Receive text banner
+ buf := make([]byte, 12)
+ _, err := io.ReadFull(sshconn.Conn, buf)
+ c.Check(err, check.IsNil)
+ c.Check(string(buf), check.Equals, "SSH-2.0-Go\r\n")
+
+ // Send text banner
+ _, err = sshconn.Conn.Write([]byte("SSH-2.0-Fake\r\n"))
+ c.Check(err, check.IsNil)
+
+ // Receive binary
+ _, err = io.ReadFull(sshconn.Conn, buf[:4])
+ c.Check(err, check.IsNil)
+
+ // If we can get this far into an SSH handshake...
+ c.Logf("was able to read %x -- success, tunnel is working", buf[:4])
+ }()
+ select {
+ case <-done:
+ case <-time.After(time.Second):
+ c.Fail()
+ }
+ ctr, err = s.localdb.ContainerGet(s.ctx, arvados.GetOptions{UUID: s.ctrUUID})
+ c.Check(err, check.IsNil)
+ c.Check(ctr.InteractiveSessionStarted, check.Equals, true)
+}
diff --git a/lib/controller/router/router.go b/lib/controller/router/router.go
index 586ea8e67..a87dbca92 100644
--- a/lib/controller/router/router.go
+++ b/lib/controller/router/router.go
@@ -244,6 +244,13 @@ func (rtr *router) addRoutes() {
return rtr.backend.ContainerSSH(ctx, *opts.(*arvados.ContainerSSHOptions))
},
},
+ {
+ arvados.EndpointContainerGatewayTunnel,
+ func() interface{} { return &arvados.ContainerGatewayTunnelOptions{} },
+ func(ctx context.Context, opts interface{}) (interface{}, error) {
+ return rtr.backend.ContainerGatewayTunnel(ctx, *opts.(*arvados.ContainerGatewayTunnelOptions))
+ },
+ },
{
arvados.EndpointGroupCreate,
func() interface{} { return &arvados.CreateOptions{} },
diff --git a/lib/controller/rpc/conn.go b/lib/controller/rpc/conn.go
index 1148068d7..8e25ca0d0 100644
--- a/lib/controller/rpc/conn.go
+++ b/lib/controller/rpc/conn.go
@@ -332,6 +332,39 @@ func (conn *Conn) ContainerUnlock(ctx context.Context, options arvados.GetOption
// a running container. If the returned error is nil, the caller is
// responsible for closing sshconn.Conn.
func (conn *Conn) ContainerSSH(ctx context.Context, options arvados.ContainerSSHOptions) (sshconn arvados.ContainerSSHConnection, err error) {
+ u, err := conn.baseURL.Parse("/" + strings.Replace(arvados.EndpointContainerSSH.Path, "{uuid}", options.UUID, -1))
+ if err != nil {
+ err = fmt.Errorf("url.Parse: %w", err)
+ return
+ }
+ u.RawQuery = url.Values{
+ "detach_keys": {options.DetachKeys},
+ "login_username": {options.LoginUsername},
+ }.Encode()
+ resp, err := conn.socket(ctx, u, "ssh", nil)
+ if err != nil {
+ return
+ }
+ return arvados.ContainerSSHConnection(resp), nil
+}
+
+// ContainerGatewayTunnel returns a connection to a yamux session on
+// the controller. The caller should connect the returned resp.Conn to
+// a client-side yamux session.
+func (conn *Conn) ContainerGatewayTunnel(ctx context.Context, options arvados.ContainerGatewayTunnelOptions) (tunnelconn arvados.ConnectionResponse, err error) {
+ u, err := conn.baseURL.Parse("/" + strings.Replace(arvados.EndpointContainerGatewayTunnel.Path, "{uuid}", options.UUID, -1))
+ if err != nil {
+ err = fmt.Errorf("url.Parse: %w", err)
+ return
+ }
+ return conn.socket(ctx, u, "tunnel", url.Values{
+ "auth_secret": {options.AuthSecret},
+ })
+}
+
+// socket sets up a socket using the specified API endpoint and
+// upgrade header.
+func (conn *Conn) socket(ctx context.Context, u *url.URL, upgradeHeader string, postform url.Values) (connresp arvados.ConnectionResponse, err error) {
addr := conn.baseURL.Host
if strings.Index(addr, ":") < 1 || (strings.Contains(addr, "::") && addr[0] != '[') {
// hostname or ::1 or 1::1
@@ -354,15 +387,6 @@ func (conn *Conn) ContainerSSH(ctx context.Context, options arvados.ContainerSSH
bufr := bufio.NewReader(netconn)
bufw := bufio.NewWriter(netconn)
- u, err := conn.baseURL.Parse("/" + strings.Replace(arvados.EndpointContainerSSH.Path, "{uuid}", options.UUID, -1))
- if err != nil {
- err = fmt.Errorf("tls.Dial: %w", err)
- return
- }
- u.RawQuery = url.Values{
- "detach_keys": {options.DetachKeys},
- "login_username": {options.LoginUsername},
- }.Encode()
tokens, err := conn.tokenProvider(ctx)
if err != nil {
return
@@ -370,11 +394,17 @@ func (conn *Conn) ContainerSSH(ctx context.Context, options arvados.ContainerSSH
err = httpserver.ErrorWithStatus(errors.New("unauthorized"), http.StatusUnauthorized)
return
}
- bufw.WriteString("GET " + u.String() + " HTTP/1.1\r\n")
+ postdata := postform.Encode()
+ bufw.WriteString("POST " + u.String() + " HTTP/1.1\r\n")
bufw.WriteString("Authorization: Bearer " + tokens[0] + "\r\n")
bufw.WriteString("Host: " + u.Host + "\r\n")
- bufw.WriteString("Upgrade: ssh\r\n")
+ bufw.WriteString("Upgrade: " + upgradeHeader + "\r\n")
+ bufw.WriteString("Content-Type: application/x-www-form-urlencoded\r\n")
+ fmt.Fprintf(bufw, "Content-Length: %d\r\n", len(postdata))
bufw.WriteString("\r\n")
+ if len(postdata) > 0 {
+ bufw.WriteString(postdata)
+ }
bufw.Flush()
resp, err := http.ReadResponse(bufr, &http.Request{Method: "GET"})
if err != nil {
@@ -394,13 +424,13 @@ func (conn *Conn) ContainerSSH(ctx context.Context, options arvados.ContainerSSH
err = fmt.Errorf("server did not provide a tunnel: %s (HTTP %d)", message, resp.StatusCode)
return
}
- if strings.ToLower(resp.Header.Get("Upgrade")) != "ssh" ||
+ if strings.ToLower(resp.Header.Get("Upgrade")) != upgradeHeader ||
strings.ToLower(resp.Header.Get("Connection")) != "upgrade" {
err = fmt.Errorf("bad response from server: Upgrade %q Connection %q", resp.Header.Get("Upgrade"), resp.Header.Get("Connection"))
return
}
- sshconn.Conn = netconn
- sshconn.Bufrw = &bufio.ReadWriter{Reader: bufr, Writer: bufw}
+ connresp.Conn = netconn
+ connresp.Bufrw = &bufio.ReadWriter{Reader: bufr, Writer: bufw}
return
}
diff --git a/lib/crunchrun/container_gateway.go b/lib/crunchrun/container_gateway.go
index 01457015e..49eb68c0f 100644
--- a/lib/crunchrun/container_gateway.go
+++ b/lib/crunchrun/container_gateway.go
@@ -14,16 +14,22 @@ import (
"io"
"net"
"net/http"
+ "net/url"
"os"
"os/exec"
"sync"
"syscall"
+ "time"
+ "git.arvados.org/arvados.git/lib/controller/rpc"
"git.arvados.org/arvados.git/lib/selfsigned"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/auth"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
"git.arvados.org/arvados.git/sdk/go/httpserver"
"github.com/creack/pty"
"github.com/google/shlex"
+ "github.com/hashicorp/yamux"
"golang.org/x/crypto/ssh"
"golang.org/x/net/context"
)
@@ -45,12 +51,27 @@ func (GatewayTargetStub) InjectCommand(ctx context.Context, detachKeys, username
type Gateway struct {
ContainerUUID string
- Address string // listen host:port; if port=0, Start() will change it to the selected port
- AuthSecret string
- Target GatewayTarget
- Log interface {
+ // Caller should set Address to "", or "host:0" or "host:port"
+ // where host is a known external IP address; port is a
+ // desired port number to listen on; and ":0" chooses an
+ // available dynamic port.
+ //
+ // If Address is "", Start() listens only on the loopback
+ // interface (and changes Address to "127.0.0.1:port").
+ // Otherwise it listens on all interfaces.
+ //
+ // If Address is "host:0", Start() updates Address to
+ // "host:port".
+ Address string
+ AuthSecret string
+ Target GatewayTarget
+ Log interface {
Printf(fmt string, args ...interface{})
}
+ // If non-nil, set up a ContainerGatewayTunnel, so that the
+ // controller can connect to us even if our external IP
+ // address is unknown or not routable from controller.
+ ArvadosClient *arvados.Client
sshConfig ssh.ServerConfig
requestAuth string
@@ -99,7 +120,22 @@ func (gw *Gateway) Start() error {
// from arvados-controller, and PORT is either the desired
// port where we should run our gateway server, or "0" if we
// should choose an available port.
- host, port, err := net.SplitHostPort(gw.Address)
+ extAddr := gw.Address
+ // Generally we can't know which local interface corresponds
+ // to an externally reachable IP address, so if we expect to
+ // be reachable by external hosts, we listen on all
+ // interfaces.
+ listenHost := ""
+ if extAddr == "" {
+ // If the dispatcher doesn't tell us our external IP
+ // address, controller will only be able to connect
+ // through the tunnel (see runTunnel), so our gateway
+ // server only needs to listen on the loopback
+ // interface.
+ extAddr = "127.0.0.1:0"
+ listenHost = "127.0.0.1"
+ }
+ extHost, extPort, err := net.SplitHostPort(extAddr)
if err != nil {
return err
}
@@ -121,26 +157,87 @@ func (gw *Gateway) Start() error {
Certificates: []tls.Certificate{cert},
},
},
- Addr: ":" + port,
+ Addr: net.JoinHostPort(listenHost, extPort),
}
err = srv.Start()
if err != nil {
return err
}
- // Get the port number we are listening on (the port might be
+ // Get the port number we are listening on (extPort might be
// "0" or a port name, in which case this will be different).
- _, port, err = net.SplitHostPort(srv.Addr)
+ _, listenPort, err := net.SplitHostPort(srv.Addr)
if err != nil {
return err
}
- // When changing state to Running, we will set
- // gateway_address to "HOST:PORT" where HOST is our
- // external hostname/IP as provided by arvados-dispatch-cloud,
- // and PORT is the port number we ended up listening on.
- gw.Address = net.JoinHostPort(host, port)
+ // When changing state to Running, the caller will want to set
+ // gateway_address to a "HOST:PORT" that, if controller
+ // connects to it, will reach this gateway server.
+ //
+ // The most likely thing to work is: HOST is our external
+ // hostname/IP as provided by the caller
+ // (arvados-dispatch-cloud) or 127.0.0.1 to indicate
+ // non-tunnel connections aren't available; and PORT is the
+ // port number we are listening on.
+ gw.Address = net.JoinHostPort(extHost, listenPort)
+ if gw.ArvadosClient != nil {
+ go gw.maintainTunnel(gw.Address)
+ }
return nil
}
+func (gw *Gateway) maintainTunnel(addr string) {
+ for ; ; time.Sleep(5 * time.Second) {
+ err := gw.runTunnel(addr)
+ gw.Log.Printf("runTunnel: %s", err)
+ }
+}
+
+// runTunnel connects to controller and sets up a tunnel through
+// which controller can connect to the gateway server at the given
+// addr.
+func (gw *Gateway) runTunnel(addr string) error {
+ ctx := auth.NewContext(context.Background(), auth.NewCredentials(gw.ArvadosClient.AuthToken))
+ arpc := rpc.NewConn("", &url.URL{Scheme: "https", Host: gw.ArvadosClient.APIHost}, gw.ArvadosClient.Insecure, rpc.PassthroughTokenProvider)
+ tun, err := arpc.ContainerGatewayTunnel(ctx, arvados.ContainerGatewayTunnelOptions{
+ UUID: gw.ContainerUUID,
+ AuthSecret: gw.AuthSecret,
+ })
+ if err != nil {
+ return fmt.Errorf("error creating gateway tunnel: %s", err)
+ }
+ mux, err := yamux.Client(tun.Conn, nil)
+ if err != nil {
+ return fmt.Errorf("error setting up mux client end: %s", err)
+ }
+ for {
+ muxconn, err := mux.Accept()
+ if err != nil {
+ return err
+ }
+ gw.Log.Printf("receiving connection from tunnel, remoteAddr %s", muxconn.RemoteAddr().String())
+ go func() {
+ defer muxconn.Close()
+ gwconn, err := net.Dial("tcp", addr)
+ if err != nil {
+ gw.Log.Printf("error connecting to %s on behalf of tunnel connection: %s", addr, err)
+ return
+ }
+ defer gwconn.Close()
+ var wg sync.WaitGroup
+ wg.Add(2)
+ go func() {
+ defer wg.Done()
+ io.Copy(gwconn, muxconn)
+ }()
+ go func() {
+ defer wg.Done()
+ io.Copy(muxconn, gwconn)
+ }()
+ wg.Wait()
+ }()
+ }
+}
+
// handleSSH connects to an SSH server that allows the caller to run
// interactive commands as root (or any other desired user) inside the
// container. The tunnel itself can only be created by an
@@ -166,7 +263,7 @@ func (gw *Gateway) handleSSH(w http.ResponseWriter, req *http.Request) {
// In future we'll handle browser traffic too, but for now the
// only traffic we expect is an SSH tunnel from
// (*lib/controller/localdb.Conn)ContainerSSH()
- if req.Method != "GET" || req.Header.Get("Upgrade") != "ssh" {
+ if req.Method != "POST" || req.Header.Get("Upgrade") != "ssh" {
http.Error(w, "path not found", http.StatusNotFound)
return
}
@@ -204,7 +301,9 @@ func (gw *Gateway) handleSSH(w http.ResponseWriter, req *http.Request) {
ctx := req.Context()
conn, newchans, reqs, err := ssh.NewServerConn(netconn, &gw.sshConfig)
- if err != nil {
+ if err == io.EOF {
+ return
+ } else if err != nil {
gw.Log.Printf("ssh.NewServerConn: %s", err)
return
}
diff --git a/lib/crunchrun/crunchrun.go b/lib/crunchrun/crunchrun.go
index 30871e734..693df171e 100644
--- a/lib/crunchrun/crunchrun.go
+++ b/lib/crunchrun/crunchrun.go
@@ -1911,6 +1911,7 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
ContainerUUID: containerUUID,
Target: cr.executor,
Log: cr.CrunchLog,
+ ArvadosClient: cr.dispatcherClient,
}
err = cr.gateway.Start()
if err != nil {
diff --git a/sdk/go/arvados/api.go b/sdk/go/arvados/api.go
index d76ece1ed..1b303ffb4 100644
--- a/sdk/go/arvados/api.go
+++ b/sdk/go/arvados/api.go
@@ -47,7 +47,8 @@ var (
EndpointContainerDelete = APIEndpoint{"DELETE", "arvados/v1/containers/{uuid}", ""}
EndpointContainerLock = APIEndpoint{"POST", "arvados/v1/containers/{uuid}/lock", ""}
EndpointContainerUnlock = APIEndpoint{"POST", "arvados/v1/containers/{uuid}/unlock", ""}
- EndpointContainerSSH = APIEndpoint{"GET", "arvados/v1/connect/{uuid}/ssh", ""} // move to /containers after #17014 fixes routing
+ EndpointContainerSSH = APIEndpoint{"POST", "arvados/v1/connect/{uuid}/ssh", ""} // move to /containers after #17014 fixes routing
+ EndpointContainerGatewayTunnel = APIEndpoint{"POST", "arvados/v1/connect/{uuid}/gateway_tunnel", ""} // move to /containers after #17014 fixes routing
EndpointContainerRequestCreate = APIEndpoint{"POST", "arvados/v1/container_requests", "container_request"}
EndpointContainerRequestUpdate = APIEndpoint{"PATCH", "arvados/v1/container_requests/{uuid}", "container_request"}
EndpointContainerRequestGet = APIEndpoint{"GET", "arvados/v1/container_requests/{uuid}", ""}
@@ -98,10 +99,18 @@ type ContainerSSHOptions struct {
LoginUsername string `json:"login_username"`
}
-type ContainerSSHConnection struct {
- Conn net.Conn `json:"-"`
- Bufrw *bufio.ReadWriter `json:"-"`
- Logger logrus.FieldLogger `json:"-"`
+type ContainerSSHConnection ConnectionResponse
+
+type ConnectionResponse struct {
+ Conn net.Conn `json:"-"`
+ Bufrw *bufio.ReadWriter `json:"-"`
+ Logger logrus.FieldLogger `json:"-"`
+ UpgradeHeader string `json:"-"`
+}
+
+type ContainerGatewayTunnelOptions struct {
+ UUID string `json:"uuid"`
+ AuthSecret string `json:"auth_secret"`
}
type GetOptions struct {
@@ -255,6 +264,7 @@ type API interface {
ContainerLock(ctx context.Context, options GetOptions) (Container, error)
ContainerUnlock(ctx context.Context, options GetOptions) (Container, error)
ContainerSSH(ctx context.Context, options ContainerSSHOptions) (ContainerSSHConnection, error)
+ ContainerGatewayTunnel(ctx context.Context, options ContainerGatewayTunnelOptions) (ConnectionResponse, error)
ContainerRequestCreate(ctx context.Context, options CreateOptions) (ContainerRequest, error)
ContainerRequestUpdate(ctx context.Context, options UpdateOptions) (ContainerRequest, error)
ContainerRequestGet(ctx context.Context, options GetOptions) (ContainerRequest, error)
diff --git a/sdk/go/arvados/container_gateway.go b/sdk/go/arvados/container_gateway.go
index 00c98d572..d1d512856 100644
--- a/sdk/go/arvados/container_gateway.go
+++ b/sdk/go/arvados/container_gateway.go
@@ -14,14 +14,14 @@ import (
"github.com/sirupsen/logrus"
)
-func (sshconn ContainerSSHConnection) ServeHTTP(w http.ResponseWriter, req *http.Request) {
+func (cresp ConnectionResponse) ServeHTTP(w http.ResponseWriter, req *http.Request) {
hj, ok := w.(http.Hijacker)
if !ok {
http.Error(w, "ResponseWriter does not support connection upgrade", http.StatusInternalServerError)
return
}
w.Header().Set("Connection", "upgrade")
- w.Header().Set("Upgrade", "ssh")
+ w.Header().Set("Upgrade", cresp.UpgradeHeader)
w.WriteHeader(http.StatusSwitchingProtocols)
conn, bufrw, err := hj.Hijack()
if err != nil {
@@ -37,10 +37,10 @@ func (sshconn ContainerSSHConnection) ServeHTTP(w http.ResponseWriter, req *http
go func() {
defer wg.Done()
defer cancel()
- n, err := io.CopyN(conn, sshconn.Bufrw, int64(sshconn.Bufrw.Reader.Buffered()))
+ n, err := io.CopyN(conn, cresp.Bufrw, int64(cresp.Bufrw.Reader.Buffered()))
bytesOut += n
if err == nil {
- n, err = io.Copy(conn, sshconn.Conn)
+ n, err = io.Copy(conn, cresp.Conn)
bytesOut += n
}
if err != nil {
@@ -51,10 +51,10 @@ func (sshconn ContainerSSHConnection) ServeHTTP(w http.ResponseWriter, req *http
go func() {
defer wg.Done()
defer cancel()
- n, err := io.CopyN(sshconn.Conn, bufrw, int64(bufrw.Reader.Buffered()))
+ n, err := io.CopyN(cresp.Conn, bufrw, int64(bufrw.Reader.Buffered()))
bytesIn += n
if err == nil {
- n, err = io.Copy(sshconn.Conn, conn)
+ n, err = io.Copy(cresp.Conn, conn)
bytesIn += n
}
if err != nil {
@@ -62,10 +62,10 @@ func (sshconn ContainerSSHConnection) ServeHTTP(w http.ResponseWriter, req *http
}
}()
<-ctx.Done()
- if sshconn.Logger != nil {
+ if cresp.Logger != nil {
go func() {
wg.Wait()
- sshconn.Logger.WithFields(logrus.Fields{
+ cresp.Logger.WithFields(logrus.Fields{
"bytesIn": bytesIn,
"bytesOut": bytesOut,
}).Info("closed connection")
diff --git a/sdk/go/arvadostest/api.go b/sdk/go/arvadostest/api.go
index f49d29ce2..d784abf67 100644
--- a/sdk/go/arvadostest/api.go
+++ b/sdk/go/arvadostest/api.go
@@ -113,6 +113,10 @@ func (as *APIStub) ContainerSSH(ctx context.Context, options arvados.ContainerSS
as.appendCall(ctx, as.ContainerSSH, options)
return arvados.ContainerSSHConnection{}, as.Error
}
+func (as *APIStub) ContainerGatewayTunnel(ctx context.Context, options arvados.ContainerGatewayTunnelOptions) (arvados.ConnectionResponse, error) {
+ as.appendCall(ctx, as.ContainerGatewayTunnel, options)
+ return arvados.ConnectionResponse{}, as.Error
+}
func (as *APIStub) ContainerRequestCreate(ctx context.Context, options arvados.CreateOptions) (arvados.ContainerRequest, error) {
as.appendCall(ctx, as.ContainerRequestCreate, options)
return arvados.ContainerRequest{}, as.Error
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index 3a04c5604..b35aca39e 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -498,9 +498,9 @@ class Container < ArvadosModel
permitted.push :priority
when Running
- permitted.push :priority, :output_properties, *progress_attrs
+ permitted.push :priority, :output_properties, :gateway_address, *progress_attrs
if self.state_changed?
- permitted.push :started_at, :gateway_address
+ permitted.push :started_at
end
if !self.interactive_session_started_was
permitted.push :interactive_session_started
diff --git a/services/api/test/unit/container_test.rb b/services/api/test/unit/container_test.rb
index ac3e6bea4..87613bdf0 100644
--- a/services/api/test/unit/container_test.rb
+++ b/services/api/test/unit/container_test.rb
@@ -960,6 +960,7 @@ class ContainerTest < ActiveSupport::TestCase
Thread.current[:user] = auth.user
end
+ assert c.update_attributes(gateway_address: "127.0.0.1:9")
assert c.update_attributes(output: collections(:collection_owned_by_active).portable_data_hash)
assert c.update_attributes(runtime_status: {'warning' => 'something happened'})
assert c.update_attributes(progress: 0.5)
commit 9e53cc300534f111426f466efefec4b13c4ca44d
Author: Tom Clegg <tom at curii.com>
Date: Fri Jun 17 13:41:50 2022 -0400
19166: Fix logging noise in test suite.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/python/tests/run_test_server.py b/sdk/python/tests/run_test_server.py
index 2c01b35ae..2b6684ff5 100644
--- a/sdk/python/tests/run_test_server.py
+++ b/sdk/python/tests/run_test_server.py
@@ -830,6 +830,7 @@ def setup_config():
"JobsAPI": {
"GitInternalDir": os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'internal.git'),
},
+ "LocalKeepBlobBuffersPerVCPU": 0,
"SupportedDockerImageFormats": {"v1": {}},
"ShellAccess": {
"Admin": True,
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list