Skip to content

Commit

Permalink
Merge pull request statsd#162 from Dieterbe/histogram
Browse files Browse the repository at this point in the history
support histograms
  • Loading branch information
mrtazz committed Feb 19, 2013
2 parents 9ca9e77 + 2d256e2 commit 817738f
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 5 deletions.
28 changes: 28 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,34 @@ generate the following list of stats for each threshold:
Where `$KEY` is the stats key you specify when sending to statsd, and `$PCT` is
the percentile threshold.

Use the `config.histogram` setting to instruct statsd to maintain histograms
over time. Specify which metrics to match and a corresponding list of
ordered non-inclusive upper limits of bins (class intervals).
(use `inf` to denote infinity; a lower limit of 0 is assumed)
Each `flushInterval`, statsd will store how many values (absolute frequency)
fall within each bin (class interval), for all matching metrics.
Examples:

* no histograms for any timer (default): `[]`
* histogram to only track render durations,
with unequal class intervals and catchall for outliers:

[ { metric: 'render', bins: [ 0.01, 0.1, 1, 10, 'inf'] } ]

* histogram for all timers except 'foo' related,
with equal class interval and catchall for outliers:

[ { metric: 'foo', bins: [] },
{ metric: '', bins: [ 50, 100, 150, 200, 'inf'] } ]

Note:

* first match for a metric wins.
* bin upper limits may contain decimals.
* this is actually more powerful than what's strictly considered
histograms, as you can make each bin arbitrarily wide,
i.e. class intervals of different sizes.

Gauges
------
StatsD now also supports gauges, arbitrary values, which can be recorded.
Expand Down
13 changes: 12 additions & 1 deletion backends/graphite.js
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,21 @@ var flush_stats = function graphite_flush(ts, metrics) {

for (key in timer_data) {
if (Object.keys(timer_data).length > 0) {
var namespace = timerNamespace.concat(key);
var the_key = namespace.join(".");
for (timer_data_key in timer_data[key]) {
var namespace = timerNamespace.concat(key);
var the_key = namespace.join(".");
statString += the_key + '.' + timer_data_key + ' ' + timer_data[key][timer_data_key] + ts_suffix;

if (typeof(timer_data[key][timer_data_key]) === 'number') {
statString += the_key + '.' + timer_data_key + ' ' + timer_data[key][timer_data_key] + ts_suffix;
} else {
for (timer_data_sub_key in timer_data[key][timer_data_key]) {
l.log(timer_data[key][timer_data_key][timer_data_sub_key].toString());
statString += the_key + '.' + timer_data_key + '.' + timer_data_sub_key + ' ' +
timer_data[key][timer_data_key][timer_data_sub_key] + ts_suffix;
}
}
}

numStats += 1;
Expand Down
16 changes: 16 additions & 0 deletions exampleConfig.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,22 @@ Optional Variables:
repeaterProtocol: whether to use udp4 or udp6 for repeaters.
["udp4" or "udp6", default: "udp4"]
histogram: for timers, an array of mappings of strings (to match metrics) and
corresponding ordered non-inclusive upper limits of bins.
For all matching metrics, histograms are maintained over
time by writing the frequencies for all bins.
'inf' means infinity. A lower limit of 0 is assumed.
default: [], meaning no histograms for any timer.
First match wins. examples:
* histogram to only track render durations, with unequal
class intervals and catchall for outliers:
[ { metric: 'render', bins: [ 0.01, 0.1, 1, 10, 'inf'] } ]
* histogram for all timers except 'foo' related,
equal class interval and catchall for outliers:
[ { metric: 'foo', bins: [] },
{ metric: '', bins: [ 50, 100, 150, 200, 'inf'] } ]
*/
{
graphitePort: 2003
Expand Down
26 changes: 26 additions & 0 deletions lib/process_metrics.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) {
var timers = metrics.timers;
var timer_counters = metrics.timer_counters;
var pctThreshold = metrics.pctThreshold;
var histogram = metrics.histogram;

for (key in counters) {
var value = counters[key];
Expand Down Expand Up @@ -72,6 +73,31 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) {
current_timer_data["sum"] = sum;
current_timer_data["mean"] = mean;

// note: values bigger than the upper limit of the last bin are ignored, by design
conf = histogram || [];
bins = [];
for (var i = 0; i < conf.length; i++) {
if (key.indexOf(conf[i].metric) > -1) {
bins = conf[i].bins;
break;
}
}
if(bins.length) {
current_timer_data['histogram'] = {};
}
// the outer loop iterates bins, the inner loop iterates timer values;
// within each run of the inner loop we should only consider the timer value range that's within the scope of the current bin
// so we leverage the fact that the values are already sorted to end up with only full 1 iteration of the entire values range
var i = 0;
for (var bin_i = 0; bin_i < bins.length; bin_i++) {
var freq = 0;
for (; i < count && (bins[bin_i] == 'inf' || values[i] < bins[bin_i]); i++) {
freq += 1;
}
bin_name = 'bin_' + bins[bin_i];
current_timer_data['histogram'][bin_name] = freq;
}

timer_data[key] = current_timer_data;

}
Expand Down
5 changes: 3 additions & 2 deletions stats.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ function flushMetrics() {
sets: sets,
counter_rates: counter_rates,
timer_data: timer_data,
pctThreshold: pctThreshold
pctThreshold: pctThreshold,
histogram: conf.histogram
}

// After all listeners, reset the stats
Expand Down Expand Up @@ -124,7 +125,7 @@ config.configFile(process.argv[2], function (config, oldConfig) {
bad_lines_seen = prefixStats + ".bad_lines_seen";
packets_received = prefixStats + ".packets_received";

//now set to zero so we can increment them
//now set to zero so we can increment them
counters[bad_lines_seen] = 0;
counters[packets_received] = 0;

Expand Down
8 changes: 7 additions & 1 deletion test/graphite_tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ module.exports = {
, batch: 200 \n\
, flushInterval: " + this.myflush + " \n\
, percentThreshold: 90\n\
, histogram: [ { metric: \"a_test_value\", bins: [1000] } ]\n\
, port: 8125\n\
, dumpMessages: false \n\
, debug: false\n\
Expand Down Expand Up @@ -190,7 +191,7 @@ module.exports = {
},

timers_are_valid: function (test) {
test.expect(5);
test.expect(6);

var testvalue = 100;
var me = this;
Expand All @@ -214,6 +215,11 @@ module.exports = {
var mykey = 'stats.timers.a_test_value.mean_90';
return _.include(_.keys(post),mykey) && (post[mykey] == testvalue);
};
var testtimerhistogramvalue_test = function(post){
var mykey = 'stats.timers.a_test_value.histogram.bin_1000';
return _.include(_.keys(post),mykey) && (post[mykey] == 1);
};
test.ok(_.any(hashes,testtimerhistogramvalue_test), 'stats.timers.a_test_value.mean should be ' + 1);
test.ok(_.any(hashes,testtimervalue_test), 'stats.timers.a_test_value.mean should be ' + testvalue);

var count_test = function(post, metric){
Expand Down
41 changes: 40 additions & 1 deletion test/process_metrics_tests.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
var pm = require('../lib/process_metrics')
var pm = require('../lib/process_metrics'),
_ = require('underscore');

module.exports = {
setUp: function (callback) {
Expand Down Expand Up @@ -143,6 +144,44 @@ module.exports = {
test.equal(150, timer_data.mean_80);
test.equal(200, timer_data.upper_80);
test.equal(300, timer_data.sum_80);
test.done();
}, // check if the correct settings are being applied. as well as actual counts
timers_histogram: function (test) {
test.expect(13);
this.metrics.timers['a'] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
this.metrics.timers['abc'] = [0.1234, 2.89, 4, 6, 8];
this.metrics.timers['foo'] = [0, 2, 4, 6, 8];
this.metrics.timers['barbazfoobar'] = [0, 2, 4, 6, 8];
this.metrics.timers['bar.bazfoobar.abc'] = [0, 2, 4, 6, 8];
this.metrics.timers['xyz'] = [0, 2, 4, 6, 8];
this.metrics.histogram = [ { metric: 'foo', bins: [] },
{ metric: 'abcd', bins: [ 1, 5, 'inf'] },
{ metric: 'abc', bins: [ 1, 2.21, 'inf'] },
{ metric: 'a', bins: [ 1, 2] } ];
pm.process_metrics(this.metrics, 100, this.time_stamp, function(){});
timer_data = this.metrics.timer_data;
// nothing matches the 'abcd' config, so nothing has bin_5
test.equal(undefined, timer_data['a']['histogram']['bin_5']);
test.equal(undefined, timer_data['abc']['histogram']['bin_5']);

// check that 'a' got the right config and numbers
test.equal(0, timer_data['a']['histogram']['bin_1']);
test.equal(1, timer_data['a']['histogram']['bin_2']);
test.equal(undefined, timer_data['a']['histogram']['bin_inf']);

// only 'abc' should have a bin_inf; also check all its counts,
// and make sure it has no other bins
test.equal(1, timer_data['abc']['histogram']['bin_1']);
test.equal(0, timer_data['abc']['histogram']['bin_2.21']);
test.equal(4, timer_data['abc']['histogram']['bin_inf']);
test.equal(3, _.size(timer_data['abc']['histogram']));

// these all have histograms disabled ('foo' explicitly, rest implicitly)
test.equal(undefined, timer_data['foo']['histogram']);
test.equal(undefined, timer_data['barbazfoobar']['histogram']);
test.equal(undefined, timer_data['bar.bazfoobar.abc']['histogram']);
test.equal(undefined, timer_data['xyz']['histogram']);

test.done();
},
statsd_metrics_exist: function(test) {
Expand Down

0 comments on commit 817738f

Please sign in to comment.