Remove config reload failure metrics

This commit is contained in:
Romain 2023-03-20 15:14:05 +01:00 committed by GitHub
parent 20e47d9102
commit 598a257ae1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 3 additions and 79 deletions

View file

@ -19,9 +19,7 @@ var (
// Metric names consistent with https://github.com/DataDog/integrations-extras/pull/64
const (
ddConfigReloadsName = "config.reload.total"
ddConfigReloadsFailureTagName = "failure"
ddLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
ddLastConfigReloadFailureName = "config.reload.lastFailureTimestamp"
ddTLSCertsNotAfterTimestampName = "tls.certs.notAfterTimestamp"
ddEntryPointReqsName = "entrypoint.request.total"
@ -64,9 +62,7 @@ func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry {
registry := &standardRegistry{
configReloadsCounter: datadogClient.NewCounter(ddConfigReloadsName, 1.0),
configReloadsFailureCounter: datadogClient.NewCounter(ddConfigReloadsName, 1.0).With(ddConfigReloadsFailureTagName, "true"),
lastConfigReloadSuccessGauge: datadogClient.NewGauge(ddLastConfigReloadSuccessName),
lastConfigReloadFailureGauge: datadogClient.NewGauge(ddLastConfigReloadFailureName),
tlsCertsNotAfterTimestampGauge: datadogClient.NewGauge(ddTLSCertsNotAfterTimestampName),
}

View file

@ -45,9 +45,7 @@ func testDatadogRegistry(t *testing.T, metricsPrefix string, datadogRegistry Reg
expected := []string{
metricsPrefix + ".config.reload.total:1.000000|c\n",
metricsPrefix + ".config.reload.total:1.000000|c|#failure:true\n",
metricsPrefix + ".config.reload.lastSuccessTimestamp:1.000000|g\n",
metricsPrefix + ".config.reload.lastFailureTimestamp:1.000000|g\n",
metricsPrefix + ".tls.certs.notAfterTimestamp:1.000000|g|#key:value\n",
@ -80,9 +78,7 @@ func testDatadogRegistry(t *testing.T, metricsPrefix string, datadogRegistry Reg
udp.ShouldReceiveAll(t, expected, func() {
datadogRegistry.ConfigReloadsCounter().Add(1)
datadogRegistry.ConfigReloadsFailureCounter().Add(1)
datadogRegistry.LastConfigReloadSuccessGauge().Add(1)
datadogRegistry.LastConfigReloadFailureGauge().Add(1)
datadogRegistry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1)

View file

@ -25,9 +25,7 @@ var (
const (
influxDBConfigReloadsName = "traefik.config.reload.total"
influxDBConfigReloadsFailureName = influxDBConfigReloadsName + ".failure"
influxDBLastConfigReloadSuccessName = "traefik.config.reload.lastSuccessTimestamp"
influxDBLastConfigReloadFailureName = "traefik.config.reload.lastFailureTimestamp"
influxDBTLSCertsNotAfterTimestampName = "traefik.tls.certs.notAfterTimestamp"
@ -84,9 +82,7 @@ func RegisterInfluxDB2(ctx context.Context, config *types.InfluxDB2) Registry {
registry := &standardRegistry{
configReloadsCounter: influxDB2Store.NewCounter(influxDBConfigReloadsName),
configReloadsFailureCounter: influxDB2Store.NewCounter(influxDBConfigReloadsFailureName),
lastConfigReloadSuccessGauge: influxDB2Store.NewGauge(influxDBLastConfigReloadSuccessName),
lastConfigReloadFailureGauge: influxDB2Store.NewGauge(influxDBLastConfigReloadFailureName),
tlsCertsNotAfterTimestampGauge: influxDB2Store.NewGauge(influxDBTLSCertsNotAfterTimestampName),
}

View file

@ -47,15 +47,11 @@ func TestInfluxDB2(t *testing.T) {
expectedServer := []string{
`(traefik\.config\.reload\.total count=1) [\d]{19}`,
`(traefik\.config\.reload\.total\.failure count=1) [\d]{19}`,
`(traefik\.config\.reload\.lastSuccessTimestamp value=1) [\d]{19}`,
`(traefik\.config\.reload\.lastFailureTimestamp value=1) [\d]{19}`,
}
influxDB2Registry.ConfigReloadsCounter().Add(1)
influxDB2Registry.ConfigReloadsFailureCounter().Add(1)
influxDB2Registry.LastConfigReloadSuccessGauge().Set(1)
influxDB2Registry.LastConfigReloadFailureGauge().Set(1)
msgServer := <-c
assertMessage(t, *msgServer, expectedServer)

View file

@ -22,9 +22,7 @@ type Registry interface {
// server metrics
ConfigReloadsCounter() metrics.Counter
ConfigReloadsFailureCounter() metrics.Counter
LastConfigReloadSuccessGauge() metrics.Gauge
LastConfigReloadFailureGauge() metrics.Gauge
// TLS
@ -71,9 +69,7 @@ func NewVoidRegistry() Registry {
// This allows for feature disparity between the different metric implementations.
func NewMultiRegistry(registries []Registry) Registry {
var configReloadsCounter []metrics.Counter
var configReloadsFailureCounter []metrics.Counter
var lastConfigReloadSuccessGauge []metrics.Gauge
var lastConfigReloadFailureGauge []metrics.Gauge
var tlsCertsNotAfterTimestampGauge []metrics.Gauge
var entryPointReqsCounter []metrics.Counter
var entryPointReqsTLSCounter []metrics.Counter
@ -100,15 +96,9 @@ func NewMultiRegistry(registries []Registry) Registry {
if r.ConfigReloadsCounter() != nil {
configReloadsCounter = append(configReloadsCounter, r.ConfigReloadsCounter())
}
if r.ConfigReloadsFailureCounter() != nil {
configReloadsFailureCounter = append(configReloadsFailureCounter, r.ConfigReloadsFailureCounter())
}
if r.LastConfigReloadSuccessGauge() != nil {
lastConfigReloadSuccessGauge = append(lastConfigReloadSuccessGauge, r.LastConfigReloadSuccessGauge())
}
if r.LastConfigReloadFailureGauge() != nil {
lastConfigReloadFailureGauge = append(lastConfigReloadFailureGauge, r.LastConfigReloadFailureGauge())
}
if r.TLSCertsNotAfterTimestampGauge() != nil {
tlsCertsNotAfterTimestampGauge = append(tlsCertsNotAfterTimestampGauge, r.TLSCertsNotAfterTimestampGauge())
}
@ -179,9 +169,7 @@ func NewMultiRegistry(registries []Registry) Registry {
svcEnabled: len(serviceReqsCounter) > 0 || len(serviceReqDurationHistogram) > 0 || len(serviceOpenConnsGauge) > 0 || len(serviceRetriesCounter) > 0 || len(serviceServerUpGauge) > 0,
routerEnabled: len(routerReqsCounter) > 0 || len(routerReqDurationHistogram) > 0 || len(routerOpenConnsGauge) > 0,
configReloadsCounter: multi.NewCounter(configReloadsCounter...),
configReloadsFailureCounter: multi.NewCounter(configReloadsFailureCounter...),
lastConfigReloadSuccessGauge: multi.NewGauge(lastConfigReloadSuccessGauge...),
lastConfigReloadFailureGauge: multi.NewGauge(lastConfigReloadFailureGauge...),
tlsCertsNotAfterTimestampGauge: multi.NewGauge(tlsCertsNotAfterTimestampGauge...),
entryPointReqsCounter: multi.NewCounter(entryPointReqsCounter...),
entryPointReqsTLSCounter: multi.NewCounter(entryPointReqsTLSCounter...),
@ -211,9 +199,7 @@ type standardRegistry struct {
routerEnabled bool
svcEnabled bool
configReloadsCounter metrics.Counter
configReloadsFailureCounter metrics.Counter
lastConfigReloadSuccessGauge metrics.Gauge
lastConfigReloadFailureGauge metrics.Gauge
tlsCertsNotAfterTimestampGauge metrics.Gauge
entryPointReqsCounter metrics.Counter
entryPointReqsTLSCounter metrics.Counter
@ -253,18 +239,10 @@ func (r *standardRegistry) ConfigReloadsCounter() metrics.Counter {
return r.configReloadsCounter
}
func (r *standardRegistry) ConfigReloadsFailureCounter() metrics.Counter {
return r.configReloadsFailureCounter
}
func (r *standardRegistry) LastConfigReloadSuccessGauge() metrics.Gauge {
return r.lastConfigReloadSuccessGauge
}
func (r *standardRegistry) LastConfigReloadFailureGauge() metrics.Gauge {
return r.lastConfigReloadFailureGauge
}
func (r *standardRegistry) TLSCertsNotAfterTimestampGauge() metrics.Gauge {
return r.tlsCertsNotAfterTimestampGauge
}

View file

@ -57,9 +57,7 @@ func RegisterOpenTelemetry(ctx context.Context, config *types.OpenTelemetry) Reg
routerEnabled: config.AddRoutersLabels,
svcEnabled: config.AddServicesLabels,
configReloadsCounter: newOTLPCounterFrom(meter, configReloadsTotalName, "Config reloads"),
configReloadsFailureCounter: newOTLPCounterFrom(meter, configReloadsFailuresTotalName, "Config reload failures"),
lastConfigReloadSuccessGauge: newOTLPGaugeFrom(meter, configLastReloadSuccessName, "Last config reload success", unit.Milliseconds),
lastConfigReloadFailureGauge: newOTLPGaugeFrom(meter, configLastReloadFailureName, "Last config reload failure", unit.Milliseconds),
tlsCertsNotAfterTimestampGauge: newOTLPGaugeFrom(meter, tlsCertsNotAfterTimestamp, "Certificate expiration timestamp", unit.Milliseconds),
}

View file

@ -340,15 +340,11 @@ func TestOpenTelemetry(t *testing.T) {
// TODO: the len of startUnixNano is no supposed to be 20, it should be 19
expected = append(expected,
`({"name":"traefik_config_reloads_total","description":"Config reloads","unit":"1","sum":{"dataPoints":\[{"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`,
`({"name":"traefik_config_reloads_failure_total","description":"Config reload failures","unit":"1","sum":{"dataPoints":\[{"startTimeUnixNano":"[\d]{19}","timeUnixNano":"[\d]{19}","asDouble":1}\],"aggregationTemporality":2,"isMonotonic":true}})`,
`({"name":"traefik_config_last_reload_success","description":"Last config reload success","unit":"ms","gauge":{"dataPoints":\[{"startTimeUnixNano":"[\d]{20}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`,
`({"name":"traefik_config_last_reload_failure","description":"Last config reload failure","unit":"ms","gauge":{"dataPoints":\[{"startTimeUnixNano":"[\d]{20}","timeUnixNano":"[\d]{19}","asDouble":1}\]}})`,
)
registry.ConfigReloadsCounter().Add(1)
registry.ConfigReloadsFailureCounter().Add(1)
registry.LastConfigReloadSuccessGauge().Set(1)
registry.LastConfigReloadFailureGauge().Set(1)
msgServer := <-c
assertMessage(t, *msgServer, expected)

View file

@ -21,11 +21,9 @@ const (
MetricNamePrefix = "traefik_"
// server meta information.
metricConfigPrefix = MetricNamePrefix + "config_"
configReloadsTotalName = metricConfigPrefix + "reloads_total"
configReloadsFailuresTotalName = metricConfigPrefix + "reloads_failure_total"
configLastReloadSuccessName = metricConfigPrefix + "last_reload_success"
configLastReloadFailureName = metricConfigPrefix + "last_reload_failure"
metricConfigPrefix = MetricNamePrefix + "config_"
configReloadsTotalName = metricConfigPrefix + "reloads_total"
configLastReloadSuccessName = metricConfigPrefix + "last_reload_success"
// TLS.
metricsTLSPrefix = MetricNamePrefix + "tls_"
@ -118,18 +116,10 @@ func initStandardRegistry(config *types.Prometheus) Registry {
Name: configReloadsTotalName,
Help: "Config reloads",
}, []string{})
configReloadsFailures := newCounterFrom(stdprometheus.CounterOpts{
Name: configReloadsFailuresTotalName,
Help: "Config failure reloads",
}, []string{})
lastConfigReloadSuccess := newGaugeFrom(stdprometheus.GaugeOpts{
Name: configLastReloadSuccessName,
Help: "Last config reload success",
}, []string{})
lastConfigReloadFailure := newGaugeFrom(stdprometheus.GaugeOpts{
Name: configLastReloadFailureName,
Help: "Last config reload failure",
}, []string{})
tlsCertsNotAfterTimestamp := newGaugeFrom(stdprometheus.GaugeOpts{
Name: tlsCertsNotAfterTimestamp,
Help: "Certificate expiration timestamp",
@ -137,9 +127,7 @@ func initStandardRegistry(config *types.Prometheus) Registry {
promState.vectors = []vector{
configReloads.cv,
configReloadsFailures.cv,
lastConfigReloadSuccess.gv,
lastConfigReloadFailure.gv,
tlsCertsNotAfterTimestamp.gv,
}
@ -148,9 +136,7 @@ func initStandardRegistry(config *types.Prometheus) Registry {
routerEnabled: config.AddRoutersLabels,
svcEnabled: config.AddServicesLabels,
configReloadsCounter: configReloads,
configReloadsFailureCounter: configReloadsFailures,
lastConfigReloadSuccessGauge: lastConfigReloadSuccess,
lastConfigReloadFailureGauge: lastConfigReloadFailure,
tlsCertsNotAfterTimestampGauge: tlsCertsNotAfterTimestamp,
}

View file

@ -100,9 +100,7 @@ func TestPrometheus(t *testing.T) {
}
prometheusRegistry.ConfigReloadsCounter().Add(1)
prometheusRegistry.ConfigReloadsFailureCounter().Add(1)
prometheusRegistry.LastConfigReloadSuccessGauge().Set(float64(time.Now().Unix()))
prometheusRegistry.LastConfigReloadFailureGauge().Set(float64(time.Now().Unix()))
prometheusRegistry.
TLSCertsNotAfterTimestampGauge().
@ -201,18 +199,10 @@ func TestPrometheus(t *testing.T) {
name: configReloadsTotalName,
assert: buildCounterAssert(t, configReloadsTotalName, 1),
},
{
name: configReloadsFailuresTotalName,
assert: buildCounterAssert(t, configReloadsFailuresTotalName, 1),
},
{
name: configLastReloadSuccessName,
assert: buildTimestampAssert(t, configLastReloadSuccessName),
},
{
name: configLastReloadFailureName,
assert: buildTimestampAssert(t, configLastReloadFailureName),
},
{
name: tlsCertsNotAfterTimestamp,
labels: map[string]string{

View file

@ -18,9 +18,7 @@ var (
const (
statsdConfigReloadsName = "config.reload.total"
statsdConfigReloadsFailureName = statsdConfigReloadsName + ".failure"
statsdLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
statsdLastConfigReloadFailureName = "config.reload.lastFailureTimestamp"
statsdTLSCertsNotAfterTimestampName = "tls.certs.notAfterTimestamp"
@ -63,9 +61,7 @@ func RegisterStatsd(ctx context.Context, config *types.Statsd) Registry {
registry := &standardRegistry{
configReloadsCounter: statsdClient.NewCounter(statsdConfigReloadsName, 1.0),
configReloadsFailureCounter: statsdClient.NewCounter(statsdConfigReloadsFailureName, 1.0),
lastConfigReloadSuccessGauge: statsdClient.NewGauge(statsdLastConfigReloadSuccessName),
lastConfigReloadFailureGauge: statsdClient.NewGauge(statsdLastConfigReloadFailureName),
tlsCertsNotAfterTimestampGauge: statsdClient.NewGauge(statsdTLSCertsNotAfterTimestampName),
}

View file

@ -49,9 +49,7 @@ func testRegistry(t *testing.T, metricsPrefix string, registry Registry) {
expected := []string{
metricsPrefix + ".config.reload.total:1.000000|c\n",
metricsPrefix + ".config.reload.total.failure:1.000000|c\n",
metricsPrefix + ".config.reload.lastSuccessTimestamp:1.000000|g\n",
metricsPrefix + ".config.reload.lastFailureTimestamp:1.000000|g\n",
metricsPrefix + ".tls.certs.notAfterTimestamp:1.000000|g\n",
@ -81,9 +79,7 @@ func testRegistry(t *testing.T, metricsPrefix string, registry Registry) {
udp.ShouldReceiveAll(t, expected, func() {
registry.ConfigReloadsCounter().Add(1)
registry.ConfigReloadsFailureCounter().Add(1)
registry.LastConfigReloadSuccessGauge().Set(1)
registry.LastConfigReloadFailureGauge().Set(1)
registry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1)