Feature: Exponential Backoff in Retry Middleware

This commit is contained in:
Daniel Adams 2020-11-05 10:14:04 -05:00 committed by GitHub
parent 3a8cb3f010
commit 74d1d55051
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 218 additions and 44 deletions

View file

@ -9,17 +9,19 @@ TODO: add schema
The Retry middleware is in charge of reissuing a request a given number of times to a backend server if that server does not reply.
To be clear, as soon as the server answers, the middleware stops retrying, regardless of the response status.
The Retry middleware has an optional configuration for exponential backoff.
## Configuration Examples
```yaml tab="Docker"
# Retry to send request 4 times
# Retry to send request 4 times with exponential backoff
labels:
- "traefik.http.middlewares.test-retry.retry.attempts=4"
- "traefik.http.middlewares.test-retry.retry.initialinterval=100ms"
```
```yaml tab="Kubernetes"
# Retry to send request 4 times
# Retry to send request 4 times with exponential backoff
apiVersion: traefik.containo.us/v1alpha1
kind: Middleware
metadata:
@ -27,45 +29,55 @@ metadata:
spec:
retry:
attempts: 4
initialInterval: 100ms
```
```yaml tab="Consul Catalog"
# Retry to send request 4 times
# Retry to send request 4 times with exponential backoff
- "traefik.http.middlewares.test-retry.retry.attempts=4"
- "traefik.http.middlewares.test-retry.retry.initialinterval=100ms"
```
```json tab="Marathon"
"labels": {
"traefik.http.middlewares.test-retry.retry.attempts": "4"
"traefik.http.middlewares.test-retry.retry.attempts": "4",
"traefik.http.middlewares.test-retry.retry.initialinterval": "100ms",
}
```
```yaml tab="Rancher"
# Retry to send request 4 times
# Retry to send request 4 times with exponential backoff
labels:
- "traefik.http.middlewares.test-retry.retry.attempts=4"
- "traefik.http.middlewares.test-retry.retry.initialinterval=100ms"
```
```toml tab="File (TOML)"
# Retry to send request 4 times
[http.middlewares]
[http.middlewares.test-retry.retry]
attempts = 4
attempts = 4
initialInterval = "100ms"
```
```yaml tab="File (YAML)"
# Retry to send request 4 times
# Retry to send request 4 times with exponential backoff
http:
middlewares:
test-retry:
retry:
attempts: 4
attempts: 4
initialInterval: 100ms
```
## Configuration Options
### `attempts`
### `attempts`
_mandatory_
The `attempts` option defines how many times the request should be retried.
### `initialInterval`
The `initialInterval` option defines the first wait time in the exponential backoff series (provided in seconds or as a valid duration format, see [time.ParseDuration](https://golang.org/pkg/time/#ParseDuration)). The maximum interval is calculated as twice the `initialInterval`. If unspecified, requests will be retried immediately.

View file

@ -112,6 +112,7 @@
- "traefik.http.middlewares.middleware19.replacepathregex.regex=foobar"
- "traefik.http.middlewares.middleware19.replacepathregex.replacement=foobar"
- "traefik.http.middlewares.middleware20.retry.attempts=42"
- "traefik.http.middlewares.middleware20.retry.initialinterval=42"
- "traefik.http.middlewares.middleware21.stripprefix.forceslash=true"
- "traefik.http.middlewares.middleware21.stripprefix.prefixes=foobar, foobar"
- "traefik.http.middlewares.middleware22.stripprefixregex.regex=foobar, foobar"

View file

@ -261,6 +261,7 @@
[http.middlewares.Middleware20]
[http.middlewares.Middleware20.retry]
attempts = 42
initialInterval = 42
[http.middlewares.Middleware21]
[http.middlewares.Middleware21.stripPrefix]
prefixes = ["foobar", "foobar"]

View file

@ -298,6 +298,7 @@ http:
Middleware20:
retry:
attempts: 42
initialInterval: 42
Middleware21:
stripPrefix:
prefixes:

View file

@ -129,6 +129,7 @@
| `traefik/http/middlewares/Middleware19/replacePathRegex/regex` | `foobar` |
| `traefik/http/middlewares/Middleware19/replacePathRegex/replacement` | `foobar` |
| `traefik/http/middlewares/Middleware20/retry/attempts` | `42` |
| `traefik/http/middlewares/Middleware20/retry/initialInterval` | `42` |
| `traefik/http/middlewares/Middleware21/stripPrefix/forceSlash` | `true` |
| `traefik/http/middlewares/Middleware21/stripPrefix/prefixes/0` | `foobar` |
| `traefik/http/middlewares/Middleware21/stripPrefix/prefixes/1` | `foobar` |

View file

@ -111,6 +111,7 @@
"traefik.http.middlewares.middleware19.replacepathregex.regex": "foobar",
"traefik.http.middlewares.middleware19.replacepathregex.replacement": "foobar",
"traefik.http.middlewares.middleware20.retry.attempts": "42",
"traefik.http.middlewares.middleware20.retry.initialinterval": "42",
"traefik.http.middlewares.middleware21.stripprefix.forceslash": "true",
"traefik.http.middlewares.middleware21.stripprefix.prefixes": "foobar, foobar",
"traefik.http.middlewares.middleware22.stripprefixregex.regex": "foobar, foobar",

View file

@ -0,0 +1,45 @@
[global]
checkNewVersion = false
sendAnonymousUsage = false
[log]
level = "DEBUG"
[entryPoints]
[entryPoints.web]
address = ":8000"
[api]
insecure = true
[providers.file]
filename = "{{ .SelfFilename }}"
## dynamic configuration ##
[http.routers]
[http.routers.router1]
service = "service1"
middlewares = [ "retry" ]
rule = "PathPrefix(`/`)"
[http.middlewares.retry.retry]
attempts = 4
initialInterval = "500ms"
[http.services]
[http.services.service1]
[http.services.service1.loadBalancer]
[[http.services.service1.loadBalancer.servers]]
url = "http://{{.WhoamiEndpoint}}:8080"
[[http.services.service1.loadBalancer.servers]]
url = "http://{{.WhoamiEndpoint}}:8081"
[[http.services.service1.loadBalancer.servers]]
url = "http://{{.WhoamiEndpoint}}:8082"
[[http.services.service1.loadBalancer.servers]]
url = "http://{{.WhoamiEndpoint}}:80"

View file

@ -34,10 +34,41 @@ func (s *RetrySuite) TestRetry(c *check.C) {
err = try.GetRequest("http://127.0.0.1:8080/api/rawdata", 60*time.Second, try.BodyContains("PathPrefix(`/`)"))
c.Assert(err, checker.IsNil)
start := time.Now()
// This simulates a DialTimeout when connecting to the backend server.
response, err := http.Get("http://127.0.0.1:8000/")
duration, allowed := time.Since(start), time.Millisecond*250
c.Assert(err, checker.IsNil)
c.Assert(response.StatusCode, checker.Equals, http.StatusOK)
c.Assert(int64(duration), checker.LessThan, int64(allowed))
}
func (s *RetrySuite) TestRetryBackoff(c *check.C) {
whoamiEndpoint := s.composeProject.Container(c, "whoami").NetworkSettings.IPAddress
file := s.adaptFile(c, "fixtures/retry/backoff.toml", struct {
WhoamiEndpoint string
}{whoamiEndpoint})
defer os.Remove(file)
cmd, display := s.traefikCmd(withConfigFile(file))
defer display(c)
err := cmd.Start()
c.Assert(err, checker.IsNil)
defer s.killCmd(cmd)
err = try.GetRequest("http://127.0.0.1:8080/api/rawdata", 60*time.Second, try.BodyContains("PathPrefix(`/`)"))
c.Assert(err, checker.IsNil)
start := time.Now()
// This simulates a DialTimeout when connecting to the backend server.
response, err := http.Get("http://127.0.0.1:8000/")
duration := time.Since(start)
// test case delays: 500 + 700 + 1000ms with randomization. It should be safely > 1500ms
minAllowed := time.Millisecond * 1500
c.Assert(err, checker.IsNil)
c.Assert(response.StatusCode, checker.Equals, http.StatusOK)
c.Assert(int64(duration), checker.GreaterThan, int64(minAllowed))
}
func (s *RetrySuite) TestRetryWebsocket(c *check.C) {

View file

@ -376,7 +376,8 @@ type ReplacePathRegex struct {
// Retry holds the retry configuration.
type Retry struct {
Attempts int `json:"attempts,omitempty" toml:"attempts,omitempty" yaml:"attempts,omitempty" export:"true"`
Attempts int `json:"attempts,omitempty" toml:"attempts,omitempty" yaml:"attempts,omitempty"`
InitialInterval ptypes.Duration `json:"initialInterval,omitempty" toml:"initialInterval,omitempty" yaml:"initialInterval,omitempty"`
}
// +k8s:deepcopy-gen=true

View file

@ -122,6 +122,7 @@ func TestDecodeConfiguration(t *testing.T) {
"traefik.http.middlewares.Middleware15.replacepathregex.regex": "foobar",
"traefik.http.middlewares.Middleware15.replacepathregex.replacement": "foobar",
"traefik.http.middlewares.Middleware16.retry.attempts": "42",
"traefik.http.middlewares.Middleware16.retry.initialinterval": "1s",
"traefik.http.middlewares.Middleware17.stripprefix.prefixes": "foobar, fiibar",
"traefik.http.middlewares.Middleware18.stripprefixregex.regex": "foobar, fiibar",
"traefik.http.middlewares.Middleware19.compress": "true",
@ -416,7 +417,8 @@ func TestDecodeConfiguration(t *testing.T) {
},
"Middleware16": {
Retry: &dynamic.Retry{
Attempts: 42,
Attempts: 42,
InitialInterval: ptypes.Duration(time.Second),
},
},
"Middleware17": {
@ -884,7 +886,8 @@ func TestEncodeConfiguration(t *testing.T) {
},
"Middleware16": {
Retry: &dynamic.Retry{
Attempts: 42,
Attempts: 42,
InitialInterval: ptypes.Duration(time.Second),
},
},
"Middleware17": {
@ -1238,6 +1241,7 @@ func TestEncodeConfiguration(t *testing.T) {
"traefik.HTTP.Middlewares.Middleware15.ReplacePathRegex.Regex": "foobar",
"traefik.HTTP.Middlewares.Middleware15.ReplacePathRegex.Replacement": "foobar",
"traefik.HTTP.Middlewares.Middleware16.Retry.Attempts": "42",
"traefik.HTTP.Middlewares.Middleware16.Retry.InitialInterval": "1000000000",
"traefik.HTTP.Middlewares.Middleware17.StripPrefix.Prefixes": "foobar, fiibar",
"traefik.HTTP.Middlewares.Middleware17.StripPrefix.ForceSlash": "true",
"traefik.HTTP.Middlewares.Middleware18.StripPrefixRegex.Regex": "foobar, fiibar",

View file

@ -5,10 +5,13 @@ import (
"context"
"fmt"
"io/ioutil"
"math"
"net"
"net/http"
"net/http/httptrace"
"time"
"github.com/cenkalti/backoff/v4"
"github.com/opentracing/opentracing-go/ext"
"github.com/traefik/traefik/v2/pkg/config/dynamic"
"github.com/traefik/traefik/v2/pkg/log"
@ -34,12 +37,18 @@ type Listener interface {
// each of them about a retry attempt.
type Listeners []Listener
// nexter returns the duration to wait before retrying the operation.
type nexter interface {
NextBackOff() time.Duration
}
// retry is a middleware that retries requests.
type retry struct {
attempts int
next http.Handler
listener Listener
name string
attempts int
initialInterval time.Duration
next http.Handler
listener Listener
name string
}
// New returns a new retry middleware.
@ -51,10 +60,11 @@ func New(ctx context.Context, next http.Handler, config dynamic.Retry, listener
}
return &retry{
attempts: config.Attempts,
next: next,
listener: listener,
name: name,
attempts: config.Attempts,
initialInterval: time.Duration(config.InitialInterval),
next: next,
listener: listener,
name: name,
}, nil
}
@ -72,36 +82,65 @@ func (r *retry) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
}
attempts := 1
backOff := r.newBackOff()
currentInterval := 0 * time.Millisecond
for {
shouldRetry := attempts < r.attempts
retryResponseWriter := newResponseWriter(rw, shouldRetry)
select {
case <-time.After(currentInterval):
// Disable retries when the backend already received request data
trace := &httptrace.ClientTrace{
WroteHeaders: func() {
retryResponseWriter.DisableRetries()
},
WroteRequest: func(httptrace.WroteRequestInfo) {
retryResponseWriter.DisableRetries()
},
shouldRetry := attempts < r.attempts
retryResponseWriter := newResponseWriter(rw, shouldRetry)
// Disable retries when the backend already received request data
trace := &httptrace.ClientTrace{
WroteHeaders: func() {
retryResponseWriter.DisableRetries()
},
WroteRequest: func(httptrace.WroteRequestInfo) {
retryResponseWriter.DisableRetries()
},
}
newCtx := httptrace.WithClientTrace(req.Context(), trace)
r.next.ServeHTTP(retryResponseWriter, req.WithContext(newCtx))
if !retryResponseWriter.ShouldRetry() {
return
}
currentInterval = backOff.NextBackOff()
attempts++
log.FromContext(middlewares.GetLoggerCtx(req.Context(), r.name, typeName)).
Debugf("New attempt %d for request: %v", attempts, req.URL)
r.listener.Retried(req, attempts)
case <-req.Context().Done():
return
}
newCtx := httptrace.WithClientTrace(req.Context(), trace)
r.next.ServeHTTP(retryResponseWriter, req.WithContext(newCtx))
if !retryResponseWriter.ShouldRetry() {
break
}
attempts++
log.FromContext(middlewares.GetLoggerCtx(req.Context(), r.name, typeName)).
Debugf("New attempt %d for request: %v", attempts, req.URL)
r.listener.Retried(req, attempts)
}
}
func (r *retry) newBackOff() nexter {
if r.attempts < 2 || r.initialInterval <= 0 {
return &backoff.ZeroBackOff{}
}
b := backoff.NewExponentialBackOff()
b.InitialInterval = r.initialInterval
// calculate the multiplier for the given number of attempts
// so that applying the multiplier for the given number of attempts will not exceed 2 times the initial interval
// it allows to control the progression along the attempts
b.Multiplier = math.Pow(2, 1/float64(r.attempts-1))
// according to docs, b.Reset() must be called before using
b.Reset()
return b
}
// Retried exists to implement the Listener interface. It calls Retried on each of its slice entries.
func (l Listeners) Retried(req *http.Request, attempt int) {
for _, listener := range l {

View file

@ -9,10 +9,12 @@ import (
"strconv"
"strings"
"testing"
"time"
"github.com/gorilla/websocket"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
ptypes "github.com/traefik/paerser/types"
"github.com/traefik/traefik/v2/pkg/config/dynamic"
"github.com/traefik/traefik/v2/pkg/middlewares/emptybackendhandler"
"github.com/traefik/traefik/v2/pkg/testhelpers"
@ -35,6 +37,13 @@ func TestRetry(t *testing.T) {
wantResponseStatus: http.StatusOK,
amountFaultyEndpoints: 0,
},
{
desc: "no retry on success with backoff",
config: dynamic.Retry{Attempts: 1, InitialInterval: ptypes.Duration(time.Microsecond * 50)},
wantRetryAttempts: 0,
wantResponseStatus: http.StatusOK,
amountFaultyEndpoints: 0,
},
{
desc: "no retry when max request attempts is one",
config: dynamic.Retry{Attempts: 1},
@ -42,6 +51,13 @@ func TestRetry(t *testing.T) {
wantResponseStatus: http.StatusBadGateway,
amountFaultyEndpoints: 1,
},
{
desc: "no retry when max request attempts is one with backoff",
config: dynamic.Retry{Attempts: 1, InitialInterval: ptypes.Duration(time.Microsecond * 50)},
wantRetryAttempts: 0,
wantResponseStatus: http.StatusBadGateway,
amountFaultyEndpoints: 1,
},
{
desc: "one retry when one server is faulty",
config: dynamic.Retry{Attempts: 2},
@ -49,6 +65,13 @@ func TestRetry(t *testing.T) {
wantResponseStatus: http.StatusOK,
amountFaultyEndpoints: 1,
},
{
desc: "one retry when one server is faulty with backoff",
config: dynamic.Retry{Attempts: 2, InitialInterval: ptypes.Duration(time.Microsecond * 50)},
wantRetryAttempts: 1,
wantResponseStatus: http.StatusOK,
amountFaultyEndpoints: 1,
},
{
desc: "two retries when two servers are faulty",
config: dynamic.Retry{Attempts: 3},
@ -56,6 +79,13 @@ func TestRetry(t *testing.T) {
wantResponseStatus: http.StatusOK,
amountFaultyEndpoints: 2,
},
{
desc: "two retries when two servers are faulty with backoff",
config: dynamic.Retry{Attempts: 3, InitialInterval: ptypes.Duration(time.Microsecond * 50)},
wantRetryAttempts: 2,
wantResponseStatus: http.StatusOK,
amountFaultyEndpoints: 2,
},
{
desc: "max attempts exhausted delivers the 5xx response",
config: dynamic.Retry{Attempts: 3},
@ -63,6 +93,13 @@ func TestRetry(t *testing.T) {
wantResponseStatus: http.StatusBadGateway,
amountFaultyEndpoints: 3,
},
{
desc: "max attempts exhausted delivers the 5xx response with backoff",
config: dynamic.Retry{Attempts: 3, InitialInterval: ptypes.Duration(time.Microsecond * 50)},
wantRetryAttempts: 2,
wantResponseStatus: http.StatusBadGateway,
amountFaultyEndpoints: 3,
},
}
backendServer := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {