[ARVADOS] updated: 1.2.0-36-ga695e615b
Git user
git at public.curoverse.com
Thu Aug 23 15:16:06 EDT 2018
Summary of changes:
lib/dispatchcloud/azure.go | 100 ++++++++++++++++++++++++++++++++--------
lib/dispatchcloud/azure_test.go | 48 +++++++++++++++++++
lib/dispatchcloud/provider.go | 22 +++++++++
3 files changed, 152 insertions(+), 18 deletions(-)
via a695e615b207f1cfe0cd1989e7b5f1b68c391329 (commit)
from 71efe208264f60c03cbb1994b8e9e44dadddff0e (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit a695e615b207f1cfe0cd1989e7b5f1b68c391329
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Thu Aug 23 15:15:42 2018 -0400
13964: Detect and report rate limit and quota exceeded errors
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/lib/dispatchcloud/azure.go b/lib/dispatchcloud/azure.go
index 195bd8182..7f7e34fb1 100644
--- a/lib/dispatchcloud/azure.go
+++ b/lib/dispatchcloud/azure.go
@@ -9,6 +9,8 @@ import (
"fmt"
"log"
"net/http"
+ "regexp"
+ "strconv"
"sync"
"time"
@@ -17,6 +19,7 @@ import (
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-06-01/network"
storageacct "github.com/Azure/azure-sdk-for-go/services/storage/mgmt/2018-02-01/storage"
"github.com/Azure/azure-sdk-for-go/storage"
+ "github.com/Azure/go-autorest/autorest"
"github.com/Azure/go-autorest/autorest/azure"
"github.com/Azure/go-autorest/autorest/azure/auth"
"github.com/Azure/go-autorest/autorest/to"
@@ -60,23 +63,25 @@ func (cl *VirtualMachinesClientImpl) CreateOrUpdate(ctx context.Context,
future, err := cl.inner.CreateOrUpdate(ctx, resourceGroupName, VMName, parameters)
if err != nil {
- return compute.VirtualMachine{}, err
+ return compute.VirtualMachine{}, WrapAzureError(err)
}
future.WaitForCompletionRef(ctx, cl.inner.Client)
- return future.Result(cl.inner)
+ r, err := future.Result(cl.inner)
+ return r, WrapAzureError(err)
}
func (cl *VirtualMachinesClientImpl) Delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error) {
future, err := cl.inner.Delete(ctx, resourceGroupName, VMName)
if err != nil {
- return nil, err
+ return nil, WrapAzureError(err)
}
err = future.WaitForCompletionRef(ctx, cl.inner.Client)
- return future.Response(), err
+ return future.Response(), WrapAzureError(err)
}
func (cl *VirtualMachinesClientImpl) ListComplete(ctx context.Context, resourceGroupName string) (result compute.VirtualMachineListResultIterator, err error) {
- return cl.inner.ListComplete(ctx, resourceGroupName)
+ r, err := cl.inner.ListComplete(ctx, resourceGroupName)
+ return r, WrapAzureError(err)
}
type InterfacesClientWrapper interface {
@@ -95,10 +100,10 @@ type InterfacesClientImpl struct {
func (cl *InterfacesClientImpl) Delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error) {
future, err := cl.inner.Delete(ctx, resourceGroupName, VMName)
if err != nil {
- return nil, err
+ return nil, WrapAzureError(err)
}
err = future.WaitForCompletionRef(ctx, cl.inner.Client)
- return future.Response(), err
+ return future.Response(), WrapAzureError(err)
}
func (cl *InterfacesClientImpl) CreateOrUpdate(ctx context.Context,
@@ -108,14 +113,74 @@ func (cl *InterfacesClientImpl) CreateOrUpdate(ctx context.Context,
future, err := cl.inner.CreateOrUpdate(ctx, resourceGroupName, networkInterfaceName, parameters)
if err != nil {
- return network.Interface{}, err
+ return network.Interface{}, WrapAzureError(err)
}
future.WaitForCompletionRef(ctx, cl.inner.Client)
- return future.Result(cl.inner)
+ r, err := future.Result(cl.inner)
+ return r, WrapAzureError(err)
}
func (cl *InterfacesClientImpl) ListComplete(ctx context.Context, resourceGroupName string) (result network.InterfaceListResultIterator, err error) {
- return cl.inner.ListComplete(ctx, resourceGroupName)
+ r, err := cl.inner.ListComplete(ctx, resourceGroupName)
+ return r, WrapAzureError(err)
+}
+
+var quotaRe = regexp.MustCompile(`(?i:exceed|quota|limit)`)
+
+type AzureRateLimitError struct {
+ azure.RequestError
+ earliestRetry time.Time
+}
+
+func (ar *AzureRateLimitError) EarliestRetry() time.Time {
+ return ar.earliestRetry
+}
+
+type AzureQuotaError struct {
+ azure.RequestError
+}
+
+func (ar *AzureQuotaError) IsQuotaError() bool {
+ return true
+}
+
+func WrapAzureError(err error) error {
+ de, ok := err.(autorest.DetailedError)
+ if !ok {
+ return err
+ }
+ rq, ok := de.Original.(*azure.RequestError)
+ if !ok {
+ return err
+ }
+ if rq.Response == nil {
+ return err
+ }
+ if rq.Response.StatusCode == 429 || len(rq.Response.Header["Retry-After"]) >= 1 {
+ // API throttling
+ ra := rq.Response.Header["Retry-After"][0]
+ earliestRetry, parseErr := http.ParseTime(ra)
+ if parseErr != nil {
+ // Could not parse as a timestamp, must be number of seconds
+ dur, parseErr := strconv.ParseInt(ra, 10, 64)
+ if parseErr != nil {
+ earliestRetry = time.Now().Add(time.Duration(dur) * time.Second)
+ }
+ }
+ if parseErr != nil {
+ // Couldn't make sense of retry-after,
+ // so set retry to 20 seconds
+ earliestRetry = time.Now().Add(20 * time.Second)
+ }
+ return &AzureRateLimitError{*rq, earliestRetry}
+ }
+ if rq.ServiceError == nil {
+ return err
+ }
+ if quotaRe.FindString(rq.ServiceError.Code) != "" || quotaRe.FindString(rq.ServiceError.Message) != "" {
+ return &AzureQuotaError{*rq}
+ }
+ return err
}
type AzureProvider struct {
@@ -214,7 +279,7 @@ func (az *AzureProvider) Create(ctx context.Context,
}
nic, err := az.netClient.CreateOrUpdate(ctx, az.azconfig.ResourceGroup, name+"-nic", nicParameters)
if err != nil {
- return nil, err
+ return nil, WrapAzureError(err)
}
log.Printf("Created NIC %v", *nic.ID)
@@ -283,7 +348,7 @@ func (az *AzureProvider) Create(ctx context.Context,
vm, err := az.vmClient.CreateOrUpdate(ctx, az.azconfig.ResourceGroup, name, vmParameters)
if err != nil {
- return nil, err
+ return nil, WrapAzureError(err)
}
return &AzureInstance{
@@ -302,14 +367,14 @@ func (az *AzureProvider) Instances(ctx context.Context) ([]Instance, error) {
result, err := az.vmClient.ListComplete(ctx, az.azconfig.ResourceGroup)
if err != nil {
- return nil, err
+ return nil, WrapAzureError(err)
}
instances := make([]Instance, 0)
for ; result.NotDone(); err = result.Next() {
if err != nil {
- return nil, err
+ return nil, WrapAzureError(err)
}
if result.Value().Tags["arvados-class"] != nil &&
result.Value().Tags["arvados-instance-type"] != nil &&
@@ -327,7 +392,7 @@ func (az *AzureProvider) Instances(ctx context.Context) ([]Instance, error) {
func (az *AzureProvider) ManageNics(ctx context.Context) (map[string]network.Interface, error) {
result, err := az.netClient.ListComplete(ctx, az.azconfig.ResourceGroup)
if err != nil {
- return nil, err
+ return nil, WrapAzureError(err)
}
interfaces := make(map[string]network.Interface)
@@ -360,7 +425,7 @@ func (az *AzureProvider) ManageNics(ctx context.Context) (map[string]network.Int
for ; result.NotDone(); err = result.Next() {
if err != nil {
log.Printf("Error listing nics: %v", err)
- return interfaces, nil
+ return interfaces, WrapAzureError(nil)
}
if result.Value().Tags["arvados-class"] != nil &&
(*result.Value().Tags["arvados-class"]) == "crunch-dynamic-compute" {
@@ -481,8 +546,7 @@ func (ai *AzureInstance) GetTags() ([]InstanceTag, error) {
func (ai *AzureInstance) Destroy(ctx context.Context) error {
_, err := ai.provider.vmClient.Delete(ctx, ai.provider.azconfig.ResourceGroup, *ai.vm.Name)
- // check response code?
- return err
+ return WrapAzureError(err)
}
func (ai *AzureInstance) Address() string {
diff --git a/lib/dispatchcloud/azure_test.go b/lib/dispatchcloud/azure_test.go
index bcba51bfd..0c8c8d534 100644
--- a/lib/dispatchcloud/azure_test.go
+++ b/lib/dispatchcloud/azure_test.go
@@ -14,6 +14,8 @@ import (
"git.curoverse.com/arvados.git/sdk/go/config"
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-06-01/compute"
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-06-01/network"
+ "github.com/Azure/go-autorest/autorest"
+ "github.com/Azure/go-autorest/autorest/azure"
"github.com/Azure/go-autorest/autorest/to"
check "gopkg.in/check.v1"
)
@@ -156,3 +158,49 @@ func (*AzureProviderSuite) TestDestroyInstances(c *check.C) {
c.Check(i.Destroy(context.Background()), check.IsNil)
}
}
+
+func (*AzureProviderSuite) TestDeleteFake(c *check.C) {
+ ap, _, _, err := GetProvider()
+ if err != nil {
+ c.Fatal("Error making provider", err)
+ }
+
+ _, err = ap.(*AzureProvider).netClient.Delete(context.Background(), "fakefakefake", "fakefakefake")
+
+ rq := err.(autorest.DetailedError).Original.(*azure.RequestError)
+
+ log.Printf("%v %q %q", rq.Response.StatusCode, rq.ServiceError.Code, rq.ServiceError.Message)
+}
+
+func (*AzureProviderSuite) TestWrapError(c *check.C) {
+ retryError := autorest.DetailedError{
+ Original: &azure.RequestError{
+ DetailedError: autorest.DetailedError{
+ Response: &http.Response{
+ StatusCode: 429,
+ Header: map[string][]string{"Retry-After": []string{"123"}},
+ },
+ },
+ ServiceError: &azure.ServiceError{},
+ },
+ }
+ wrapped := WrapAzureError(retryError)
+ _, ok := wrapped.(RateLimitError)
+ c.Check(ok, check.Equals, true)
+
+ quotaError := autorest.DetailedError{
+ Original: &azure.RequestError{
+ DetailedError: autorest.DetailedError{
+ Response: &http.Response{
+ StatusCode: 503,
+ },
+ },
+ ServiceError: &azure.ServiceError{
+ Message: "No more quota",
+ },
+ },
+ }
+ wrapped = WrapAzureError(quotaError)
+ _, ok = wrapped.(QuotaError)
+ c.Check(ok, check.Equals, true)
+}
diff --git a/lib/dispatchcloud/provider.go b/lib/dispatchcloud/provider.go
index c5411128a..d8b1ad6c0 100644
--- a/lib/dispatchcloud/provider.go
+++ b/lib/dispatchcloud/provider.go
@@ -6,10 +6,32 @@ package dispatchcloud
import (
"context"
+ "time"
"git.curoverse.com/arvados.git/sdk/go/arvados"
)
+// A RateLimitError should be returned by a Provider when the cloud
+// service indicates it is rejecting all API calls for some time
+// interval.
+type RateLimitError interface {
+ // Time before which the caller should expect requests to
+ // fail.
+ EarliestRetry() time.Time
+ error
+}
+
+// A QuotaError should be returned by a Provider when the cloud
+// service indicates the account cannot create more VMs than already
+// exist.
+type QuotaError interface {
+ // If true, don't create more instances until some existing
+ // instances are destroyed. If false, don't handle the error
+ // as a quota error.
+ IsQuotaError() bool
+ error
+}
+
type InstanceTag string
type InstanceID string
type ImageID string
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list