Skip to content

Commit

Permalink
ceph_telemetry: accept older formats and transform
Browse files Browse the repository at this point in the history
Turn crashes from dict into list, and add a datestamp
(either from the report or from received time) to the
Elasticsearch document ID, remove hostname from crashdumps

Also add test code and requirements.txt for the tests for
ease of virtualenv creation

Signed-off-by: Dan Mick <[email protected]>
  • Loading branch information
dmick committed Apr 29, 2019
1 parent 8a4a6ab commit 0222d0d
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 3 deletions.
4 changes: 4 additions & 0 deletions src/telemetry/server/ceph_telemetry/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
elasticsearch
requests
flask
flask_restful
76 changes: 73 additions & 3 deletions src/telemetry/server/ceph_telemetry/rest/report.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,82 @@
from flask import request, jsonify
from flask_restful import Resource
from elasticsearch import Elasticsearch
import datetime


class Report(Resource):
def __init__(self, report=None):
super(Report, self).__init__()
self.report = report

def _dots_to_percent(self, obj=None):
'''
Key names cannot contain '.' in Elasticsearch, so change
them to '%', first escaping any existing '%' to '%%'.
Don't worry about values. Modifies keys in-place.
'''

# handle first call; others are recursive
if obj is None:
obj = self.report

for k, v in obj.items():
if isinstance(v, dict):
self._dots_to_percent(v)
if '.' in k:
del obj[k]
newk = k.replace('%', '%%')
newk = newk.replace('.', '%')
obj[newk] = v

def _crashes_to_list(self):
'''
Early versions of telemetry sent crashes as a dict, keyed
by crash_id. This isn't particularly useful, so if we see it,
change to the current convention of "a list of crash dicts",
which contains the crash_id. Modifies report in-place.
'''

if ('crashes' in self.report and isinstance(self.report['crashes'], dict)):
newcrashes = list()
for crashinfo in self.report['crashes'].values():
newcrashes.append(crashinfo)
self.report['crashes'] = newcrashes

def _report_id(self):
'''
Make a unique Elasticsearch document ID. Earlier versions
of telemetry did not contain a report_timestamp, so use
time-of-receipt if not present.
'''

if 'report_timestamp' in self.report:
timestamp = self.report['report_timestamp']
else:
timestamp = datetime.datetime.utcnow().isoformat()

return '.'.join((self.report['report_id'], timestamp))

def _purge_hostname_from_crash(self):
'''
hostname can be FQDN and undesirable to make public.
Remove from crashdump data (newer telemetry modules don't
submit it at all).
'''
if 'crashes' in self.report:
if not isinstance(self.report['crashes'], list):
self._crashes_to_list()
for crash in self.report['crashes']:
if 'utsname_hostname' in crash:
del crash['utsname_hostname']

def put(self):
report = request.get_json(force=True)
self.report = request.get_json(force=True)
self._crashes_to_list()
self._dots_to_percent()
self._purge_hostname_from_crash()
es_id = self._report_id()
es = Elasticsearch()
es.index(index='telemetry', doc_type='report', id=report['report_id'],
body=report)
es.index(index='telemetry', doc_type='report', id=es_id,
body=self.report)
return jsonify(status=True)
57 changes: 57 additions & 0 deletions src/telemetry/server/ceph_telemetry/test_rest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from copy import deepcopy
from rest.report import Report

report_data = {
"crashes": {
"crashid1": {"crash_id": "crashreport1"},
"crashid2": {
"os_name": "TheBestOS",
"utsname_hostname": "foo.bar.baz.com",
},
},
"key.with.dots": "value.with.dots.and.%",
"key.with.dots.and.%": "value.with.dots.and.%",
"key1": {
"key2": {
"key3.with.dots": "value3",
},
},
"report_timestamp": "2019-04-25T22:42:59.083915",
"report_id": "cc74d980-51ba-4c29-8534-fa813e759a7c",
}



def test_dots_to_percent():
report = Report(report_data)
report._dots_to_percent()
assert('key.with.dots' not in report.report)
assert('key%with%dots' in report.report)
assert('key%with%dots%and%%%' in report.report)
assert(report.report['key%with%dots'] == 'value.with.dots.and.%')
assert('key3%with%dots' in report.report['key1']['key2'])


def test_crashes_to_list():
report = Report(report_data)
report._crashes_to_list()
assert(isinstance(report.report['crashes'], list))
assert(len(report.report['crashes']) == 2)
assert({'crash_id' : 'crashreport1'} in report.report['crashes'])
assert({"os_name": "TheBestOS", "utsname_hostname": "foo.bar.baz.com"} in report.report['crashes'])


def test_report_id():
report = Report(report_data)
assert(report._report_id() ==
'cc74d980-51ba-4c29-8534-fa813e759a7c.2019-04-25T22:42:59.083915')
del report.report['report_timestamp']
es_id = report._report_id()
assert(es_id.startswith('cc74d980-51ba-4c29-8534-fa813e759a7c'))


def test_purge_hostname_from_crash():
report = Report(report_data)
report._purge_hostname_from_crash()
assert({"os_name": "TheBestOS", "utsname_hostname": "foo.bar.baz.com"} not in report.report['crashes'])
assert({"os_name": "TheBestOS"} in report.report['crashes'])

0 comments on commit 0222d0d

Please sign in to comment.