From 6746bbbaf7fad64abbca8409b51dac0b0ab4607c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Fri, 8 Nov 2019 11:34:09 +0200 Subject: [PATCH 1/5] compact: add metric thanos_compactor_iterations_total MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a metric called thanos_compactor_iterations_total that is a counter and will get increased by 1 every time an iteration gets executed successfully. This is needed in case --wait is specified and then our Compactor could die. We need to alert on such a case. One thing would be to alert on a restart of the container however that is not the most flexible thing - it might still be OK as long as it successfully finishes its job in time. However, it is impossible to know that exact part ATM. Add this metric so that users could add alerts like: ``` rate(thanos_compactor_iterations_total[1d]) == 0 FOR 3d ``` Signed-off-by: Giedrius Statkevičius --- cmd/thanos/compact.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index ba09994913..a390c93972 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -168,10 +168,17 @@ func runCompact( Name: "thanos_compactor_retries_total", Help: "Total number of retries after retriable compactor error", }) + iterations := prometheus.NewCounter(prometheus.CounterOpts{ + Name: "thanos_compactor_iterations_total", + Help: "Total number of iterations that were executed successfully", + }) halted.Set(0) reg.MustRegister(halted) reg.MustRegister(retried) + if wait != false { + reg.MustRegister(iterations) + } downsampleMetrics := newDownsampleMetrics(reg) @@ -313,6 +320,7 @@ func runCompact( return runutil.Repeat(5*time.Minute, ctx.Done(), func() error { err := f() if err == nil { + iterations.Inc() return nil } From 2e79c6a6e71c3bea1d920a35ce86989d5ccce182 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Fri, 8 Nov 2019 11:38:39 +0200 Subject: [PATCH 2/5] CHANGELOG: add entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Giedrius Statkevičius --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8f4785457..aa48b3d80e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ We use *breaking* word for marking changes that are not backward compatible (rel - [#1573](https://github.com/thanos-io/thanos/pull/1573) `AliYun OSS` object storage, see [documents](docs/storage.md#aliyun-oss) for further information. - [#1680](https://github.com/thanos-io/thanos/pull/1680) Add a new `--http-grace-period` CLI option to components which serve HTTP to set how long to wait until HTTP Server shuts down. - [#1712](https://github.com/thanos-io/thanos/pull/1712) Rename flag on bucket web component from `--listen` to `--http-address` to match other components. +- [#1733](https://github.com/thanos-io/thanos/pull/1733) New metric `thanos_compactor_iterations_total` on Thanos Compactor which shows the number of successful iterations ### Fixed From 8a9a90d7795d7d83727c89a2dbb5ab9d0cd88365 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Fri, 8 Nov 2019 11:42:07 +0200 Subject: [PATCH 3/5] compact: simplify wait check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Giedrius Statkevičius --- cmd/thanos/compact.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index a390c93972..53d6433e24 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -176,7 +176,7 @@ func runCompact( reg.MustRegister(halted) reg.MustRegister(retried) - if wait != false { + if wait { reg.MustRegister(iterations) } From 742b4515dc71fa351927ec01f66c0ef378e34a09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Fri, 8 Nov 2019 23:17:38 +0200 Subject: [PATCH 4/5] cmd: thanos: compact: remove wait check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's register the metric no matter what since if it is run as a batch job then this metric does not matter either way. Signed-off-by: Giedrius Statkevičius --- cmd/thanos/compact.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 53d6433e24..2589a251ee 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -176,9 +176,7 @@ func runCompact( reg.MustRegister(halted) reg.MustRegister(retried) - if wait { - reg.MustRegister(iterations) - } + reg.MustRegister(iterations) downsampleMetrics := newDownsampleMetrics(reg) From c8e88c1961583f9f08b384af76c0482021b4bc5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Mon, 11 Nov 2019 17:23:56 +0200 Subject: [PATCH 5/5] CHANGELOG: add period MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a period at the end of an item in the CHANGELOG to keep it uniform. Signed-off-by: Giedrius Statkevičius --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa48b3d80e..dc3262faef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,7 @@ We use *breaking* word for marking changes that are not backward compatible (rel - [#1573](https://github.com/thanos-io/thanos/pull/1573) `AliYun OSS` object storage, see [documents](docs/storage.md#aliyun-oss) for further information. - [#1680](https://github.com/thanos-io/thanos/pull/1680) Add a new `--http-grace-period` CLI option to components which serve HTTP to set how long to wait until HTTP Server shuts down. - [#1712](https://github.com/thanos-io/thanos/pull/1712) Rename flag on bucket web component from `--listen` to `--http-address` to match other components. -- [#1733](https://github.com/thanos-io/thanos/pull/1733) New metric `thanos_compactor_iterations_total` on Thanos Compactor which shows the number of successful iterations +- [#1733](https://github.com/thanos-io/thanos/pull/1733) New metric `thanos_compactor_iterations_total` on Thanos Compactor which shows the number of successful iterations. ### Fixed