Skip to content

Commit

Permalink
Better metrics and error reporting
Browse files Browse the repository at this point in the history
  • Loading branch information
castrapel committed Apr 25, 2019
1 parent 272285f commit 2bc604e
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 16 deletions.
50 changes: 42 additions & 8 deletions lemur/plugins/lemur_acme/dyn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import dns.name
import dns.query
import dns.resolver
from dyn.tm.errors import DynectCreateError, DynectGetError
from dyn.tm.errors import DynectCreateError, DynectDeleteError, DynectGetError, DynectUpdateError
from dyn.tm.session import DynectSession
from dyn.tm.zones import Node, Zone, get_all_zones
from flask import current_app
Expand Down Expand Up @@ -51,17 +51,23 @@ def _has_dns_propagated(name, token):

def wait_for_dns_change(change_id, account_number=None):
fqdn, token = change_id
number_of_attempts = 10
number_of_attempts = 20
for attempts in range(0, number_of_attempts):
status = _has_dns_propagated(fqdn, token)
current_app.logger.debug("Record status for fqdn: {}: {}".format(fqdn, status))
if status:
metrics.send('wait_for_dns_change_success', 'counter', 1)
break
time.sleep(20)
time.sleep(10)
if not status:
# TODO: Delete associated DNS text record here
metrics.send('wait_for_dns_change_fail', 'counter', 1)
sentry.captureException(
extra={
"fqdn": fqdn, "txt_record": token}
)
metrics.send('wait_for_dns_change_error', 'counter', 1,
metric_tags={'fqdn': fqdn, 'txt_record': token})
raise Exception("Unable to query DNS token for fqdn {}.".format(fqdn))
return

Expand Down Expand Up @@ -105,7 +111,7 @@ def create_txt_record(domain, token, account_number):
zone.add_record(node_name, record_type='TXT', txtdata="\"{}\"".format(token), ttl=5)
zone.publish()
current_app.logger.debug("TXT record created: {0}, token: {1}".format(fqdn, token))
except DynectCreateError as e:
except (DynectCreateError, DynectUpdateError) as e:
if "Cannot duplicate existing record data" in e.message:
current_app.logger.debug(
"Unable to add record. Domain: {}. Token: {}. "
Expand Down Expand Up @@ -138,14 +144,33 @@ def delete_txt_record(change_id, account_number, domain, token):
all_txt_records = node.get_all_records_by_type('TXT')
except DynectGetError:
sentry.captureException()
metrics.send('delete_txt_record_error', 'counter', 1)
metrics.send('delete_txt_record_geterror', 'counter', 1)
# No Text Records remain or host is not in the zone anymore because all records have been deleted.
return
for txt_record in all_txt_records:
if txt_record.txtdata == ("{}".format(token)):
current_app.logger.debug("Deleting TXT record name: {0}".format(fqdn))
txt_record.delete()
zone.publish()
try:
txt_record.delete()
except DynectDeleteError:
sentry.captureException(
extra={
"fqdn": fqdn, "zone_name": zone_name, "node_name": node_name,
"txt_record": txt_record.txtdata}
)
metrics.send('delete_txt_record_deleteerror', 'counter', 1,
metric_tags={'fqdn': fqdn, 'txt_record': txt_record.txtdata})

try:
zone.publish()
except DynectUpdateError:
sentry.captureException(
extra={
"fqdn": fqdn, "zone_name": zone_name, "node_name": node_name,
"txt_record": txt_record.txtdata}
)
metrics.send('delete_txt_record_publish_error', 'counter', 1,
metric_tags={'fqdn': fqdn, 'txt_record': txt_record.txtdata})


def delete_acme_txt_records(domain):
Expand All @@ -171,7 +196,16 @@ def delete_acme_txt_records(domain):
all_txt_records = node.get_all_records_by_type('TXT')
for txt_record in all_txt_records:
current_app.logger.debug("Deleting TXT record name: {0}".format(fqdn))
txt_record.delete()
try:
txt_record.delete()
except DynectDeleteError:
sentry.captureException(
extra={
"fqdn": fqdn, "zone_name": zone_name, "node_name": node_name,
"txt_record": txt_record.txtdata}
)
metrics.send('delete_txt_record_deleteerror', 'counter', 1,
metric_tags={'fqdn': fqdn, 'txt_record': txt_record.txtdata})
zone.publish()


Expand Down
7 changes: 4 additions & 3 deletions lemur/plugins/lemur_acme/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,9 @@ def request_certificate(self, acme_client, authorizations, order):
try:
orderr = acme_client.finalize_order(order, deadline)
except AcmeError:
sentry.captureException()
sentry.captureException(extra={"order_url": order.uri})
metrics.send('request_certificate_error', 'counter', 1)
current_app.logger.error(f"Unable to resolve Acme order: {order}", exc_info=True)
current_app.logger.error(f"Unable to resolve Acme order: {order.uri}", exc_info=True)
raise

pem_certificate = OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
Expand Down Expand Up @@ -289,9 +289,10 @@ def cleanup_dns_challenges(self, acme_client, authorizations):
dns_challenges = authz_record.dns_challenge
host_to_validate = self.maybe_remove_wildcard(authz_record.host)
host_to_validate = self.maybe_add_extension(host_to_validate, dns_provider_options)
dns_provider_plugin = self.get_dns_provider(dns_provider.provider_type)
for dns_challenge in dns_challenges:
try:
dns_provider.delete_txt_record(
dns_provider_plugin.delete_txt_record(
authz_record.change_id,
account_number,
dns_challenge.validation_domain_name(host_to_validate),
Expand Down
23 changes: 18 additions & 5 deletions lemur/plugins/lemur_aws/elb.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from retrying import retry

from lemur.extensions import metrics
from lemur.extensions import metrics, sentry
from lemur.exceptions import InvalidListener
from lemur.plugins.lemur_aws.sts import sts_client

Expand Down Expand Up @@ -149,27 +149,40 @@ def describe_listeners_v2(**kwargs):


@sts_client('elb')
@retry(retry_on_exception=retry_throttled, wait_fixed=2000)
@retry(retry_on_exception=retry_throttled, wait_fixed=2000, stop_max_attempt_number=20)
def describe_load_balancer_policies(load_balancer_name, policy_names, **kwargs):
"""
Fetching all policies currently associated with an ELB.
:param load_balancer_name:
:return:
"""
return kwargs['client'].describe_load_balancer_policies(LoadBalancerName=load_balancer_name, PolicyNames=policy_names)
try:
return kwargs['client'].describe_load_balancer_policies(LoadBalancerName=load_balancer_name,
PolicyNames=policy_names)
except Exception as e: # noqa
metrics.send('describe_load_balancer_policies_fail', 'counter', 1,
metric_tags={"load_balancer_name": load_balancer_name, "policy_names": policy_names, "error": e})
sentry.captureException(extra={"load_balancer_name": load_balancer_name, "policy_names": policy_names})
raise


@sts_client('elbv2')
@retry(retry_on_exception=retry_throttled, wait_fixed=2000)
@retry(retry_on_exception=retry_throttled, wait_fixed=2000, stop_max_attempt_number=20)
def describe_ssl_policies_v2(policy_names, **kwargs):
"""
Fetching all policies currently associated with an ELB.
:param policy_names:
:return:
"""
return kwargs['client'].describe_ssl_policies(Names=policy_names)
try:
return kwargs['client'].describe_ssl_policies(Names=policy_names)
except Exception as e: # noqa
metrics.send('describe_ssl_policies_v2_fail', 'counter', 1,
metric_tags={"policy_names": policy_names, "error": e})
sentry.captureException(extra={"policy_names": policy_names})
raise


@sts_client('elb')
Expand Down

0 comments on commit 2bc604e

Please sign in to comment.