From 1c98a9ad3edf26a887b29d09246035f3fc8268e7 Mon Sep 17 00:00:00 2001 From: Timo Reimann Date: Tue, 26 Sep 2017 10:22:03 +0200 Subject: [PATCH] Add request accepting grace period delaying graceful shutdown. --- cmd/traefik/configuration.go | 7 ++- cmd/traefik/traefik.go | 32 +--------- configuration/configuration.go | 61 ++++++++++++++++++- configuration/configuration_test.go | 50 +++++++++++++++ docs/configuration/commons.md | 42 ++++++++++++- integration/basic_test.go | 61 +++++++++++++++++++ integration/fixtures/reqacceptgrace.toml | 22 +++++++ .../resources/compose/reqacceptgrace.yml | 2 + server/server.go | 4 +- server/server_signals.go | 8 ++- 10 files changed, 251 insertions(+), 38 deletions(-) create mode 100644 integration/fixtures/reqacceptgrace.toml create mode 100644 integration/resources/compose/reqacceptgrace.yml diff --git a/cmd/traefik/configuration.go b/cmd/traefik/configuration.go index cc127965f..a27ca1e05 100644 --- a/cmd/traefik/configuration.go +++ b/cmd/traefik/configuration.go @@ -180,6 +180,11 @@ func NewTraefikDefaultPointersConfiguration() *TraefikConfiguration { DialTimeout: flaeg.Duration(configuration.DefaultDialTimeout), } + // default LifeCycle + defaultLifeycle := configuration.LifeCycle{ + GraceTimeOut: flaeg.Duration(configuration.DefaultGraceTimeout), + } + defaultConfiguration := configuration.GlobalConfiguration{ Docker: &defaultDocker, File: &defaultFile, @@ -202,6 +207,7 @@ func NewTraefikDefaultPointersConfiguration() *TraefikConfiguration { ForwardingTimeouts: &forwardingTimeouts, TraefikLog: &defaultTraefikLog, AccessLog: &defaultAccessLog, + LifeCycle: &defaultLifeycle, } return &TraefikConfiguration{ @@ -213,7 +219,6 @@ func NewTraefikDefaultPointersConfiguration() *TraefikConfiguration { func NewTraefikConfiguration() *TraefikConfiguration { return &TraefikConfiguration{ GlobalConfiguration: configuration.GlobalConfiguration{ - GraceTimeOut: flaeg.Duration(10 * time.Second), AccessLogsFile: "", TraefikLogsFile: "", LogLevel: "ERROR", diff --git a/cmd/traefik/traefik.go b/cmd/traefik/traefik.go index 23e097bc8..a8b96205d 100644 --- a/cmd/traefik/traefik.go +++ b/cmd/traefik/traefik.go @@ -22,7 +22,6 @@ import ( "github.com/containous/traefik/log" "github.com/containous/traefik/provider/ecs" "github.com/containous/traefik/provider/kubernetes" - "github.com/containous/traefik/provider/rancher" "github.com/containous/traefik/safe" "github.com/containous/traefik/server" "github.com/containous/traefik/types" @@ -228,36 +227,7 @@ func run(globalConfiguration *configuration.GlobalConfiguration) { http.DefaultTransport.(*http.Transport).Proxy = http.ProxyFromEnvironment - if len(globalConfiguration.EntryPoints) == 0 { - globalConfiguration.EntryPoints = map[string]*configuration.EntryPoint{"http": {Address: ":80"}} - globalConfiguration.DefaultEntryPoints = []string{"http"} - } - - if globalConfiguration.Rancher != nil { - // Ensure backwards compatibility for now - if len(globalConfiguration.Rancher.AccessKey) > 0 || - len(globalConfiguration.Rancher.Endpoint) > 0 || - len(globalConfiguration.Rancher.SecretKey) > 0 { - - if globalConfiguration.Rancher.API == nil { - globalConfiguration.Rancher.API = &rancher.APIConfiguration{ - AccessKey: globalConfiguration.Rancher.AccessKey, - SecretKey: globalConfiguration.Rancher.SecretKey, - Endpoint: globalConfiguration.Rancher.Endpoint, - } - } - log.Warn("Deprecated configuration found: rancher.[accesskey|secretkey|endpoint]. " + - "Please use rancher.api.[accesskey|secretkey|endpoint] instead.") - } - - if globalConfiguration.Rancher.Metadata != nil && len(globalConfiguration.Rancher.Metadata.Prefix) == 0 { - globalConfiguration.Rancher.Metadata.Prefix = "latest" - } - } - - if globalConfiguration.Debug { - globalConfiguration.LogLevel = "DEBUG" - } + globalConfiguration.SetEffectiveConfiguration() // logging level, err := logrus.ParseLevel(strings.ToLower(globalConfiguration.LogLevel)) diff --git a/configuration/configuration.go b/configuration/configuration.go index 088537321..8345eafdf 100644 --- a/configuration/configuration.go +++ b/configuration/configuration.go @@ -11,6 +11,7 @@ import ( "github.com/containous/flaeg" "github.com/containous/traefik/acme" + "github.com/containous/traefik/log" "github.com/containous/traefik/provider/boltdb" "github.com/containous/traefik/provider/consul" "github.com/containous/traefik/provider/docker" @@ -37,12 +38,17 @@ const ( // DefaultIdleTimeout before closing an idle connection. DefaultIdleTimeout = 180 * time.Second + + // DefaultGraceTimeout controls how long Traefik serves pending requests + // prior to shutting down. + DefaultGraceTimeout = 10 * time.Second ) // GlobalConfiguration holds global configuration (with providers, etc.). // It's populated from the traefik configuration file passed as an argument to the binary. type GlobalConfiguration struct { - GraceTimeOut flaeg.Duration `short:"g" description:"Duration to give active requests a chance to finish before Traefik stops"` + LifeCycle *LifeCycle `description:"Timeouts influencing the server life cycle"` + GraceTimeOut flaeg.Duration `short:"g" description:"(Deprecated) Duration to give active requests a chance to finish before Traefik stops"` // Deprecated Debug bool `short:"d" description:"Enable debug mode"` CheckNewVersion bool `description:"Periodically check if a new version has been released"` AccessLogsFile string `description:"(Deprecated) Access logs file"` // Deprecated @@ -81,6 +87,52 @@ type GlobalConfiguration struct { DynamoDB *dynamodb.Provider `description:"Enable DynamoDB backend with default settings"` } +// SetEffectiveConfiguration adds missing configuration parameters derived from +// existing ones. It also takes care of maintaining backwards compatibility. +func (gc *GlobalConfiguration) SetEffectiveConfiguration() { + if len(gc.EntryPoints) == 0 { + gc.EntryPoints = map[string]*EntryPoint{"http": {Address: ":80"}} + gc.DefaultEntryPoints = []string{"http"} + } + + // Make sure LifeCycle isn't nil to spare nil checks elsewhere. + if gc.LifeCycle == nil { + gc.LifeCycle = &LifeCycle{} + } + + // Prefer legacy grace timeout parameter for backwards compatibility reasons. + if gc.GraceTimeOut > 0 { + log.Warn("top-level grace period configuration has been deprecated -- please use lifecycle grace period") + gc.LifeCycle.GraceTimeOut = gc.GraceTimeOut + } + + if gc.Rancher != nil { + // Ensure backwards compatibility for now + if len(gc.Rancher.AccessKey) > 0 || + len(gc.Rancher.Endpoint) > 0 || + len(gc.Rancher.SecretKey) > 0 { + + if gc.Rancher.API == nil { + gc.Rancher.API = &rancher.APIConfiguration{ + AccessKey: gc.Rancher.AccessKey, + SecretKey: gc.Rancher.SecretKey, + Endpoint: gc.Rancher.Endpoint, + } + } + log.Warn("Deprecated configuration found: rancher.[accesskey|secretkey|endpoint]. " + + "Please use rancher.api.[accesskey|secretkey|endpoint] instead.") + } + + if gc.Rancher.Metadata != nil && len(gc.Rancher.Metadata.Prefix) == 0 { + gc.Rancher.Metadata.Prefix = "latest" + } + } + + if gc.Debug { + gc.LogLevel = "DEBUG" + } +} + // DefaultEntryPoints holds default entry points type DefaultEntryPoints []string @@ -446,3 +498,10 @@ type ForwardingTimeouts struct { DialTimeout flaeg.Duration `description:"The amount of time to wait until a connection to a backend server can be established. Defaults to 30 seconds. If zero, no timeout exists"` ResponseHeaderTimeout flaeg.Duration `description:"The amount of time to wait for a server's response headers after fully writing the request (including its body, if any). If zero, no timeout exists"` } + +// LifeCycle contains configurations relevant to the lifecycle (such as the +// shutdown phase) of Traefik. +type LifeCycle struct { + RequestAcceptGraceTimeout flaeg.Duration `description:"Duration to keep accepting requests before Traefik initiates the graceful shutdown procedure"` + GraceTimeOut flaeg.Duration `description:"Duration to give active requests a chance to finish before Traefik stops"` +} diff --git a/configuration/configuration_test.go b/configuration/configuration_test.go index 5d476e4df..ed20c6b02 100644 --- a/configuration/configuration_test.go +++ b/configuration/configuration_test.go @@ -3,7 +3,9 @@ package configuration import ( "fmt" "testing" + "time" + "github.com/containous/flaeg" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -199,3 +201,51 @@ func TestEntryPoints_Set(t *testing.T) { }) } } + +func TestSetEffecticeConfiguration(t *testing.T) { + tests := []struct { + desc string + legacyGraceTimeout time.Duration + lifeCycleGraceTimeout time.Duration + wantGraceTimeout time.Duration + }{ + { + desc: "legacy grace timeout given only", + legacyGraceTimeout: 5 * time.Second, + wantGraceTimeout: 5 * time.Second, + }, + { + desc: "legacy and life cycle grace timeouts given", + legacyGraceTimeout: 5 * time.Second, + lifeCycleGraceTimeout: 12 * time.Second, + wantGraceTimeout: 5 * time.Second, + }, + { + desc: "legacy grace timeout omitted", + legacyGraceTimeout: 0, + lifeCycleGraceTimeout: 12 * time.Second, + wantGraceTimeout: 12 * time.Second, + }, + } + + for _, test := range tests { + test := test + t.Run(test.desc, func(t *testing.T) { + t.Parallel() + gc := &GlobalConfiguration{ + GraceTimeOut: flaeg.Duration(test.legacyGraceTimeout), + } + if test.lifeCycleGraceTimeout > 0 { + gc.LifeCycle = &LifeCycle{ + GraceTimeOut: flaeg.Duration(test.lifeCycleGraceTimeout), + } + } + + gc.SetEffectiveConfiguration() + gotGraceTimeout := time.Duration(gc.LifeCycle.GraceTimeOut) + if gotGraceTimeout != test.wantGraceTimeout { + t.Fatalf("got effective grace timeout %d, want %d", gotGraceTimeout, test.wantGraceTimeout) + } + }) + } +} diff --git a/docs/configuration/commons.md b/docs/configuration/commons.md index bbeda32c2..e4252f913 100644 --- a/docs/configuration/commons.md +++ b/docs/configuration/commons.md @@ -3,10 +3,16 @@ ## Main Section ```toml -# Duration to give active requests a chance to finish before Traefik stops. +# DEPRECATED - for general usage instruction see [lifeCycle.graceTimeOut]. +# +# If both the deprecated option and the new one are given, the deprecated one +# takes precedence. +# A value of zero is equivalent to omitting the parameter, causing +# [lifeCycle.graceTimeOut] to be effective. Pass zero to the new option in +# order to disable the grace period. # # Optional -# Default: "10s" +# Default: "0s" # # graceTimeOut = "10s" @@ -303,6 +309,38 @@ Given provider-specific support, the value may be overridden on a per-backend ba Can be provided in a format supported by [time.ParseDuration](https://golang.org/pkg/time/#ParseDuration) or as raw values (digits). If no units are provided, the value is parsed assuming seconds. +## Life Cycle + +Controls the behavior of Traefik during the shutdown phase. + +```toml +[lifeCycle] + +# Duration to keep accepting requests prior to initiating the graceful +# termination period (as defined by the `graceTimeOut` option). This +# option is meant to give downstream load-balancers sufficient time to +# take Traefik out of rotation. +# Can be provided in a format supported by [time.ParseDuration](https://golang.org/pkg/time/#ParseDuration) or as raw values (digits). +# If no units are provided, the value is parsed assuming seconds. +# The zero duration disables the request accepting grace period, i.e., +# Traefik will immediately proceed to the grace period. +# +# Optional +# Default: 0 +# +# requestAcceptGraceTimeout = "10s" + +# Duration to give active requests a chance to finish before Traefik stops. +# Can be provided in a format supported by [time.ParseDuration](https://golang.org/pkg/time/#ParseDuration) or as raw values (digits). +# If no units are provided, the value is parsed assuming seconds. +# Note: in this time frame no new requests are accepted. +# +# Optional +# Default: "10s" +# +# graceTimeOut = "10s" +``` + ## Timeouts ### Responding Timeouts diff --git a/integration/basic_test.go b/integration/basic_test.go index dbea008d9..128c99d75 100644 --- a/integration/basic_test.go +++ b/integration/basic_test.go @@ -3,7 +3,9 @@ package integration import ( "fmt" "net/http" + "os" "strings" + "syscall" "time" "github.com/containous/traefik/integration/try" @@ -101,3 +103,62 @@ func (s *SimpleSuite) TestPrintHelp(c *check.C) { }) c.Assert(err, checker.IsNil) } + +func (s *SimpleSuite) TestRequestAcceptGraceTimeout(c *check.C) { + s.createComposeProject(c, "reqacceptgrace") + s.composeProject.Start(c) + + whoami := "http://" + s.composeProject.Container(c, "whoami").NetworkSettings.IPAddress + ":80" + + file := s.adaptFile(c, "fixtures/reqacceptgrace.toml", struct { + Server string + }{whoami}) + defer os.Remove(file) + cmd, display := s.traefikCmd(withConfigFile(file)) + defer display(c) + err := cmd.Start() + c.Assert(err, checker.IsNil) + defer cmd.Process.Kill() + + // Wait for Traefik to turn ready. + err = try.GetRequest("http://127.0.0.1:8000/", 2*time.Second, try.StatusCodeIs(http.StatusNotFound)) + c.Assert(err, checker.IsNil) + + // Make sure exposed service is ready. + err = try.GetRequest("http://127.0.0.1:8000/service", 3*time.Second, try.StatusCodeIs(http.StatusOK)) + c.Assert(err, checker.IsNil) + + // Send SIGTERM to Traefik. + proc, err := os.FindProcess(cmd.Process.Pid) + c.Assert(err, checker.IsNil) + err = proc.Signal(syscall.SIGTERM) + c.Assert(err, checker.IsNil) + + // Give Traefik time to process the SIGTERM and send a request half-way + // into the request accepting grace period, by which requests should + // still get served. + time.Sleep(5 * time.Second) + resp, err := http.Get("http://127.0.0.1:8000/service") + c.Assert(err, checker.IsNil) + defer resp.Body.Close() + c.Assert(resp.StatusCode, checker.Equals, http.StatusOK) + + // Expect Traefik to shut down gracefully once the request accepting grace + // period has elapsed. + waitErr := make(chan error) + go func() { + waitErr <- cmd.Wait() + }() + + select { + case err := <-waitErr: + c.Assert(err, checker.IsNil) + case <-time.After(10 * time.Second): + // By now we are ~5 seconds out of the request accepting grace period + // (start + 5 seconds sleep prior to the mid-grace period request + + // 10 seconds timeout = 15 seconds > 10 seconds grace period). + // Something must have gone wrong if we still haven't terminated at + // this point. + c.Fatal("Traefik did not terminate in time") + } +} diff --git a/integration/fixtures/reqacceptgrace.toml b/integration/fixtures/reqacceptgrace.toml new file mode 100644 index 000000000..f7937a3fc --- /dev/null +++ b/integration/fixtures/reqacceptgrace.toml @@ -0,0 +1,22 @@ +defaultEntryPoints = ["http"] + +logLevel = "DEBUG" + +[entryPoints] + [entryPoints.http] + address = ":8000" + +[lifeCycle] + requestAcceptGraceTimeout = "10s" + +[file] +[backends] + [backends.backend] + [backends.backend.servers.server] + url = "{{.Server}}" + +[frontends] + [frontends.frontend] + backend = "backend" + [frontends.frontend.routes.service] + rule = "Path:/service" diff --git a/integration/resources/compose/reqacceptgrace.yml b/integration/resources/compose/reqacceptgrace.yml new file mode 100644 index 000000000..88f530b86 --- /dev/null +++ b/integration/resources/compose/reqacceptgrace.yml @@ -0,0 +1,2 @@ +whoami: + image: emilevauge/whoami diff --git a/server/server.go b/server/server.go index 3497dac91..bf9581e00 100644 --- a/server/server.go +++ b/server/server.go @@ -203,7 +203,7 @@ func (server *Server) Stop() { wg.Add(1) go func(serverEntryPointName string, serverEntryPoint *serverEntryPoint) { defer wg.Done() - graceTimeOut := time.Duration(server.globalConfiguration.GraceTimeOut) + graceTimeOut := time.Duration(server.globalConfiguration.LifeCycle.GraceTimeOut) ctx, cancel := context.WithTimeout(context.Background(), graceTimeOut) log.Debugf("Waiting %s seconds before killing connections on entrypoint %s...", graceTimeOut, serverEntryPointName) if err := serverEntryPoint.httpServer.Shutdown(ctx); err != nil { @@ -220,7 +220,7 @@ func (server *Server) Stop() { // Close destroys the server func (server *Server) Close() { - ctx, cancel := context.WithTimeout(context.Background(), time.Duration(server.globalConfiguration.GraceTimeOut)) + ctx, cancel := context.WithTimeout(context.Background(), time.Duration(server.globalConfiguration.LifeCycle.GraceTimeOut)) go func(ctx context.Context) { <-ctx.Done() if ctx.Err() == context.Canceled { diff --git a/server/server_signals.go b/server/server_signals.go index 7de108b67..83a9769d5 100644 --- a/server/server_signals.go +++ b/server/server_signals.go @@ -5,6 +5,7 @@ package server import ( "os/signal" "syscall" + "time" "github.com/containous/traefik/log" ) @@ -31,7 +32,12 @@ func (server *Server) listenSignals() { } default: log.Infof("I have to go... %+v", sig) - log.Info("Stopping server") + reqAcceptGraceTimeOut := time.Duration(server.globalConfiguration.LifeCycle.RequestAcceptGraceTimeout) + if reqAcceptGraceTimeOut > 0 { + log.Infof("Waiting %s for incoming requests to cease", reqAcceptGraceTimeOut) + time.Sleep(reqAcceptGraceTimeOut) + } + log.Info("Stopping server gracefully") server.Stop() } }