-
Notifications
You must be signed in to change notification settings - Fork 24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Tracing Service #53
Open
oktal
wants to merge
5
commits into
master
Choose a base branch
from
syslog-tracing
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Tracing Service #53
Changes from 1 commit
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
54ce843
Statistics are now computed from traces. Also exposed a REST API to c…
e1ab43c
Tweaked REST API. Added a simple client to query the REST API
f407d7c
Minor polishing and cleaner code
43dd406
Merge branch 'master' into syslog-tracing
10c8234
Correct assertion
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next
Next commit
Statistics are now computed from traces. Also exposed a REST API to c…
…onsume raw statistics
- Loading branch information
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,11 +4,6 @@ | |
Utility to collect RTBKit traces from syslog | ||
*/ | ||
|
||
#include <sys/stat.h> | ||
#include <fcntl.h> | ||
#include <unistd.h> | ||
#include <cstdio> | ||
#include <cstdlib> | ||
#include <array> | ||
|
||
#include <iostream> | ||
|
@@ -18,65 +13,106 @@ | |
|
||
#include "soa/jsoncpp/json.h" | ||
#include "soa/service/nprobe.h" | ||
#include "soa/service/service_base.h" | ||
#include "soa/service/rest_service_endpoint.h" | ||
#include "soa/service/rest_request_router.h" | ||
#include "soa/service/rest_request_binding.h" | ||
|
||
#include <boost/program_options/cmdline.hpp> | ||
#include <boost/program_options/options_description.hpp> | ||
#include <boost/program_options/positional_options.hpp> | ||
#include <boost/program_options/parsers.hpp> | ||
#include <boost/program_options/variables_map.hpp> | ||
|
||
#include "soa/service/service_utils.h" | ||
|
||
namespace { | ||
|
||
constexpr size_t MaxEntries = 1 << 3; | ||
constexpr size_t MaxEntries = 1 << 4; | ||
|
||
} | ||
|
||
struct App { | ||
static_assert(!(MaxEntries & 1), "MaxEntries must be 2^M"); | ||
|
||
using namespace Datacratic; | ||
|
||
struct TracingRestEndpoint : public ServiceBase, public RestServiceEndpoint { | ||
|
||
struct TraceEntry { | ||
int64_t tid; | ||
std::string hostname; | ||
int64_t id; | ||
int64_t parent_id; | ||
std::string tag; | ||
std::string uniq; | ||
int64_t freq; | ||
int64_t pid; | ||
struct Context { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The span and context struct should be in common with the nprobe class and it should also include the serialization and deserialization mechanism. |
||
std::string kind; | ||
int64_t freq; | ||
std::string uniq; | ||
} context; | ||
|
||
struct Span { | ||
int64_t tid; | ||
int64_t id; | ||
int64_t parent_id; | ||
std::string tag; | ||
int64_t pid; | ||
|
||
std::chrono::nanoseconds t1; | ||
std::chrono::nanoseconds t2; | ||
std::chrono::nanoseconds start; | ||
std::chrono::nanoseconds end; | ||
} span; | ||
|
||
std::string hostname; | ||
|
||
static TraceEntry fromJson(const Json::Value &root) { | ||
TraceEntry entry; | ||
try { | ||
const auto tid = root["tid"].asInt(); | ||
const auto hostname = root["host"].asString(); | ||
|
||
const auto id = root["id"].asInt(); | ||
const auto parent_id = root["pid"].asInt(); | ||
const auto kind = root["kind"].asString(); | ||
const auto tag = root["tag"].asString(); | ||
const auto uniq = root["uniq"].asString(); | ||
const int freq = root["freq"].asInt(); | ||
const auto pid = root["kpid"].asInt(); | ||
|
||
const auto t1 = std::chrono::nanoseconds { root["t1"].asInt() }; | ||
const auto t2 = std::chrono::nanoseconds { root["t2"].asInt() }; | ||
const auto start = std::chrono::nanoseconds { root["t1"].asInt() }; | ||
const auto end = std::chrono::nanoseconds { root["t2"].asInt() }; | ||
|
||
entry.context = { kind, freq, uniq }; | ||
entry.span = { tid, id, parent_id, tag, pid, start, end }; | ||
entry.hostname = hostname; | ||
|
||
return TraceEntry { tid, hostname, id, parent_id, tag, | ||
uniq, freq, pid, t1, t2 }; | ||
} catch (const std::runtime_error &e) { | ||
} | ||
|
||
return TraceEntry { }; | ||
return entry; | ||
} | ||
|
||
std::string print() const { | ||
std::ostringstream oss; | ||
oss << "TraceEntry { "; | ||
oss << "tid=" << tid << ", hostname=" << hostname | ||
<< ", id=" << id << ", parent_id=" << parent_id | ||
<< ", tag=" << tag << ", uniq=" << uniq | ||
<< ", freq=" << freq << ", pid=" << pid | ||
<< ", t1=" << t1.count() << ", t2=" << t2.count() | ||
oss << " span { " | ||
<< "tid = " << span.tid | ||
<< ", id = " << span.id << ", parent_id = " << span.parent_id | ||
<< ", tag = " << span.tag << ", pid = " << span.pid << " }" | ||
<< " context { " | ||
<< "kind = " << context.kind << ", uniq = " << context.uniq | ||
<< ", freq = " << context.freq << " }" | ||
<< " }"; | ||
return oss.str(); | ||
} | ||
}; | ||
|
||
int exec(const std::string &fifoPath) { | ||
TracingRestEndpoint( | ||
const std::shared_ptr<ServiceProxies> &proxies, | ||
const std::string &name = "tracing.rest-endpoint" | ||
) | ||
: ServiceBase(name, proxies), RestServiceEndpoint(getServices()->zmqContext) | ||
, index { 0 } | ||
{ | ||
init(getServices()->config, serviceName()); | ||
installRoutes(); | ||
} | ||
|
||
|
||
int run(const std::string &fifoPath) { | ||
int fd = open(fifoPath.c_str(), O_RDONLY); | ||
|
||
if (fd == -1) { | ||
|
@@ -93,7 +129,7 @@ struct App { | |
::perror("read"); | ||
return 1; | ||
} | ||
|
||
if (c == '}') { | ||
message += c; | ||
if (!handleMessage(message)) | ||
|
@@ -107,14 +143,134 @@ struct App { | |
if (inMessage) { | ||
message += c; | ||
} | ||
|
||
} | ||
} | ||
|
||
private: | ||
std::array<TraceEntry, MaxEntries> entries; | ||
uint64_t index; | ||
|
||
RestRequestRouter restRouter; | ||
|
||
struct StatsEntry { | ||
StatsEntry(const std::string &tag, const std::vector<TraceEntry> &serie) | ||
: tag { tag } | ||
, centile { 0.0 } | ||
, mean { 0.0 } | ||
, median { 0.0 } | ||
, serie_ { serie } | ||
{ | ||
compute(); | ||
} | ||
|
||
StatsEntry(StatsEntry &&other) = default; | ||
StatsEntry(const StatsEntry &other) = default; | ||
|
||
void compute() | ||
{ | ||
if (serie_.empty()) | ||
return; | ||
|
||
sort(begin(serie_), end(serie_), | ||
[this](const TraceEntry &lhs, const TraceEntry &rhs) { | ||
const auto duration_lhs = duration(lhs); | ||
const auto duration_rhs = duration(rhs); | ||
|
||
return duration_lhs < duration_rhs; | ||
}); | ||
|
||
const auto size = serie_.size(); | ||
const auto rank_99 = int { round(0.99 * (size - 1)) }; | ||
const auto &entry_99 = serie_[rank_99]; | ||
centile = duration(entry_99); | ||
|
||
auto acc = [this](double current, const TraceEntry &entry) | ||
{ | ||
return current + duration(entry); | ||
}; | ||
|
||
mean = accumulate(begin(serie_), end(serie_), 0.0, acc) / serie_.size(); | ||
|
||
const auto medianIndex = size / 2; | ||
if (medianIndex % 2 == 0) | ||
median = duration(serie_[medianIndex]); | ||
else | ||
median = (duration(serie_[medianIndex]) + | ||
duration(serie_[medianIndex - 1])) / 2.0; | ||
|
||
} | ||
|
||
std::string tag; | ||
double centile; | ||
double mean; | ||
double median; | ||
|
||
Json::Value toJson() const { | ||
Json::Value value; | ||
value["centile"] = centile; | ||
value["mean"] = mean; | ||
value["median"] = median; | ||
return value; | ||
} | ||
|
||
private: | ||
double duration(const TraceEntry &entry) { | ||
return std::chrono::duration_cast<std::chrono::milliseconds>( | ||
entry.span.end - entry.span.start).count(); | ||
} | ||
std::vector<TraceEntry> serie_; | ||
}; | ||
|
||
struct ObjectStats { | ||
ObjectStats(const std::string &kind) | ||
: kind { kind } | ||
{ } | ||
|
||
void addEntry(StatsEntry entry) { | ||
values.push_back(std::move(entry)); | ||
} | ||
|
||
Json::Value toJson() const { | ||
Json::Value value; | ||
std::for_each(begin(values), end(values), [&](const StatsEntry &entry) { | ||
value[entry.tag] = entry.toJson(); | ||
}); | ||
|
||
return value; | ||
} | ||
|
||
|
||
std::string kind; | ||
private: | ||
std::vector<StatsEntry> values; | ||
}; | ||
|
||
struct GlobalStats : public std::vector<ObjectStats> { | ||
Json::Value toJson() const { | ||
Json::Value root; | ||
std::for_each(begin(), end(), [&](const ObjectStats &stats) { | ||
root[stats.kind] = stats.toJson(); | ||
}); | ||
|
||
return root; | ||
} | ||
}; | ||
|
||
void installRoutes() { | ||
auto &v0Router = restRouter.addSubRouter( | ||
"/v0", "Simple tracing REST interface"); | ||
|
||
addRouteSyncReturn( | ||
v0Router, "/stats", { "GET" }, | ||
"Returns raw statistics", | ||
"Accumulated statistics from collected traces", | ||
[](const GlobalStats &stats) { return stats.toJson(); }, | ||
&TracingRestEndpoint::computeStats, | ||
this | ||
); | ||
} | ||
|
||
|
||
bool handleMessage(const std::string &message) { | ||
|
||
Json::Value root; | ||
|
@@ -125,27 +281,73 @@ struct App { | |
} | ||
|
||
auto entry = TraceEntry::fromJson(root); | ||
#if 1 | ||
std::cout << entry.print() << std::endl; | ||
#endif | ||
entries[index] = std::move(entry); | ||
|
||
entries[index & (MaxEntries - 1)] = std::move(entry); | ||
++index; | ||
index = (index + 1) & (MaxEntries - 1); | ||
|
||
return true; | ||
} | ||
}; | ||
|
||
void usage() { | ||
std::cout << "usage: syslog_aggregator fifo-path" << std::endl; | ||
} | ||
GlobalStats computeStats() const { | ||
typedef std::map<std::string, std::vector<TraceEntry>> TracingData; | ||
/* Maps object type (kind) to tracing data */ | ||
std::map<std::string, TracingData> data; | ||
|
||
int main(int argc, const char *argv[]) { | ||
if (argc == 1) { | ||
usage(); | ||
return 0; | ||
std::for_each(begin(entries), end(entries), [&](const TraceEntry &entry) { | ||
auto &tracingData = data[entry.context.kind]; | ||
auto &vec = tracingData[entry.span.tag]; | ||
vec.push_back(entry); | ||
}); | ||
|
||
GlobalStats stats; | ||
|
||
for (const auto &kind: data) { | ||
auto tags = kind.second; | ||
|
||
ObjectStats objStats(kind.first); | ||
|
||
for (const auto &tag: tags) { | ||
objStats.addEntry(StatsEntry(tag.first, tag.second)); | ||
} | ||
|
||
stats.push_back(objStats); | ||
|
||
} | ||
|
||
return stats; | ||
|
||
} | ||
|
||
void handleRequest(const ConnectionId &conn, const RestRequest &request) const | ||
{ | ||
restRouter.handleRequest(conn, request); | ||
} | ||
}; | ||
|
||
int main(int argc, const char *argv[]) { | ||
|
||
ServiceProxyArguments serviceArgs; | ||
|
||
std::string fifoPath; | ||
namespace po = boost::program_options; | ||
|
||
po::options_description options; | ||
options | ||
.add(serviceArgs.makeProgramOptions()); | ||
|
||
options.add_options() | ||
("path,p", po::value<std::string>(&fifoPath), | ||
"path of the fifo where logs are stored"); | ||
|
||
po::variables_map vm; | ||
po::store(po::command_line_parser(argc, argv).options(options).run(), vm); | ||
po::notify(vm); | ||
|
||
auto proxies = serviceArgs.makeServiceProxies(); | ||
auto serviceName = serviceArgs.serviceName("tracing"); | ||
TracingRestEndpoint endpoint(proxies, serviceName); | ||
endpoint.bindFixedHttpAddress("localhost", 3481); | ||
endpoint.start(); | ||
endpoint.run(fifoPath); | ||
|
||
App app; | ||
return app.exec(argv[1]); | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That only check that the number is pair. To check for 2^M use this instead:
https://github.com/RAttab/lockless/blob/master/src/utils.h#L30
This checks that there's only one bit set which means that you're a power of 2 that's greater then 0