Skip to content

Commit

Permalink
Better exception handling, logging, and metrics for ACME flow
Browse files Browse the repository at this point in the history
  • Loading branch information
castrapel committed Apr 24, 2019
1 parent ccd0bde commit 272285f
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 13 deletions.
28 changes: 23 additions & 5 deletions lemur/plugins/lemur_acme/dyn.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,21 @@
from dyn.tm.zones import Node, Zone, get_all_zones
from flask import current_app

from lemur.extensions import metrics, sentry


def get_dynect_session():
dynect_session = DynectSession(
current_app.config.get('ACME_DYN_CUSTOMER_NAME', ''),
current_app.config.get('ACME_DYN_USERNAME', ''),
current_app.config.get('ACME_DYN_PASSWORD', ''),
)
try:
dynect_session = DynectSession(
current_app.config.get('ACME_DYN_CUSTOMER_NAME', ''),
current_app.config.get('ACME_DYN_USERNAME', ''),
current_app.config.get('ACME_DYN_PASSWORD', ''),
)
except Exception as e:
sentry.captureException()
metrics.send('get_dynect_session_fail', 'counter', 1)
current_app.logger.debug("Unable to establish connection to Dyn", exc_info=True)
raise
return dynect_session


Expand All @@ -30,10 +38,12 @@ def _has_dns_propagated(name, token):
for txt_record in rdata.strings:
txt_records.append(txt_record.decode("utf-8"))
except dns.exception.DNSException:
metrics.send('has_dns_propagated_fail', 'counter', 1)
return False

for txt_record in txt_records:
if txt_record == token:
metrics.send('has_dns_propagated_success', 'counter', 1)
return True

return False
Expand All @@ -46,10 +56,12 @@ def wait_for_dns_change(change_id, account_number=None):
status = _has_dns_propagated(fqdn, token)
current_app.logger.debug("Record status for fqdn: {}: {}".format(fqdn, status))
if status:
metrics.send('wait_for_dns_change_success', 'counter', 1)
break
time.sleep(20)
if not status:
# TODO: Delete associated DNS text record here
metrics.send('wait_for_dns_change_fail', 'counter', 1)
raise Exception("Unable to query DNS token for fqdn {}.".format(fqdn))
return

Expand All @@ -67,6 +79,7 @@ def get_zone_name(domain):
if z.name.count(".") > zone_name.count("."):
zone_name = z.name
if not zone_name:
metrics.send('dyn_no_zone_name', 'counter', 1)
raise Exception("No Dyn zone found for domain: {}".format(domain))
return zone_name

Expand Down Expand Up @@ -99,6 +112,8 @@ def create_txt_record(domain, token, account_number):
"Record already exists: {}".format(domain, token, e), exc_info=True
)
else:
metrics.send('create_txt_record_error', 'counter', 1)
sentry.captureException()
raise

change_id = (fqdn, token)
Expand All @@ -122,6 +137,8 @@ def delete_txt_record(change_id, account_number, domain, token):
try:
all_txt_records = node.get_all_records_by_type('TXT')
except DynectGetError:
sentry.captureException()
metrics.send('delete_txt_record_error', 'counter', 1)
# No Text Records remain or host is not in the zone anymore because all records have been deleted.
return
for txt_record in all_txt_records:
Expand Down Expand Up @@ -178,6 +195,7 @@ def get_authoritative_nameserver(domain):

rcode = response.rcode()
if rcode != dns.rcode.NOERROR:
metrics.send('get_authoritative_nameserver_error', 'counter', 1)
if rcode == dns.rcode.NXDOMAIN:
raise Exception('%s does not exist.' % sub)
else:
Expand Down
48 changes: 40 additions & 8 deletions lemur/plugins/lemur_acme/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from lemur.common.utils import generate_private_key
from lemur.dns_providers import service as dns_provider_service
from lemur.exceptions import InvalidAuthority, InvalidConfiguration, UnknownProvider
from lemur.extensions import metrics, sentry
from lemur.plugins import lemur_acme as acme
from lemur.plugins.bases import IssuerPlugin
from lemur.plugins.lemur_acme import cloudflare, dyn, route53
Expand All @@ -47,7 +48,9 @@ def __init__(self):
try:
self.all_dns_providers = dns_provider_service.get_all_dns_providers()
except Exception as e:
current_app.logger.error("Unable to fetch DNS Providers: {}".format(e))
metrics.send('AcmeHandler_init_error', 'counter', 1)
sentry.captureException()
current_app.logger.error(f"Unable to fetch DNS Providers: {e}")
self.all_dns_providers = []

def find_dns_challenge(self, authorizations):
Expand Down Expand Up @@ -94,6 +97,7 @@ def complete_dns_challenge(self, acme_client, authz_record):
current_app.logger.debug("Finalizing DNS challenge for {0}".format(authz_record.authz[0].body.identifier.value))
dns_providers = self.dns_providers_for_domain.get(authz_record.host)
if not dns_providers:
metrics.send('complete_dns_challenge_error_no_dnsproviders', 'counter', 1)
raise Exception("No DNS providers found for domain: {}".format(authz_record.host))

for dns_provider in dns_providers:
Expand All @@ -102,7 +106,15 @@ def complete_dns_challenge(self, acme_client, authz_record):
account_number = dns_provider_options.get("account_id")
dns_provider_plugin = self.get_dns_provider(dns_provider.provider_type)
for change_id in authz_record.change_id:
dns_provider_plugin.wait_for_dns_change(change_id, account_number=account_number)
try:
dns_provider_plugin.wait_for_dns_change(change_id, account_number=account_number)
except Exception:
metrics.send('complete_dns_challenge_error', 'counter', 1)
sentry.captureException()
current_app.logger.debug(
f"Unable to resolve DNS challenge for change_id: {change_id}, account_id: "
f"{account_number}", exc_info=True)
raise

for dns_challenge in authz_record.dns_challenge:
response = dns_challenge.response(acme_client.client.net.key)
Expand All @@ -114,6 +126,7 @@ def complete_dns_challenge(self, acme_client, authz_record):
)

if not verified:
metrics.send('complete_dns_challenge_verification_error', 'counter', 1)
raise ValueError("Failed verification")

time.sleep(5)
Expand All @@ -129,7 +142,9 @@ def request_certificate(self, acme_client, authorizations, order):
try:
orderr = acme_client.finalize_order(order, deadline)
except AcmeError:
current_app.logger.error("Unable to resolve Acme order: {}".format(order), exc_info=True)
sentry.captureException()
metrics.send('request_certificate_error', 'counter', 1)
current_app.logger.error(f"Unable to resolve Acme order: {order}", exc_info=True)
raise

pem_certificate = OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
Expand Down Expand Up @@ -196,6 +211,7 @@ def get_authorizations(self, acme_client, order, order_info):

for domain in order_info.domains:
if not self.dns_providers_for_domain.get(domain):
metrics.send('get_authorizations_no_dns_provider_for_domain', 'counter', 1)
raise Exception("No DNS providers found for domain: {}".format(domain))
for dns_provider in self.dns_providers_for_domain[domain]:
dns_provider_plugin = self.get_dns_provider(dns_provider.provider_type)
Expand Down Expand Up @@ -284,6 +300,8 @@ def cleanup_dns_challenges(self, acme_client, authorizations):
except Exception as e:
# If this fails, it's most likely because the record doesn't exist (It was already cleaned up)
# or we're not authorized to modify it.
metrics.send('cleanup_dns_challenges_error', 'counter', 1)
sentry.captureException()
pass

def get_dns_provider(self, type):
Expand Down Expand Up @@ -378,12 +396,15 @@ def get_ordered_certificate(self, pending_cert):
try:
order = acme_client.new_order(pending_cert.csr)
except WildcardUnsupportedError:
metrics.send('get_ordered_certificate_wildcard_unsupported', 'counter', 1)
raise Exception("The currently selected ACME CA endpoint does"
" not support issuing wildcard certificates.")
try:
authorizations = self.acme.get_authorizations(acme_client, order, order_info)
except ClientError:
current_app.logger.error("Unable to resolve pending cert: {}".format(pending_cert.name), exc_info=True)
sentry.captureException()
metrics.send('get_ordered_certificate_error', 'counter', 1)
current_app.logger.error(f"Unable to resolve pending cert: {pending_cert.name}", exc_info=True)
return False

authorizations = self.acme.finalize_authorizations(acme_client, authorizations)
Expand Down Expand Up @@ -418,6 +439,8 @@ def get_ordered_certificates(self, pending_certs):
try:
order = acme_client.new_order(pending_cert.csr)
except WildcardUnsupportedError:
sentry.captureException()
metrics.send('get_ordered_certificates_wildcard_unsupported_error', 'counter', 1)
raise Exception("The currently selected ACME CA endpoint does"
" not support issuing wildcard certificates.")

Expand All @@ -430,7 +453,13 @@ def get_ordered_certificates(self, pending_certs):
"order": order,
})
except (ClientError, ValueError, Exception) as e:
current_app.logger.error("Unable to resolve pending cert: {}".format(pending_cert), exc_info=True)
sentry.captureException()
metrics.send('get_ordered_certificates_pending_creation_error', 'counter', 1)
current_app.logger.error(f"Unable to resolve pending cert: {pending_cert}", exc_info=True)

error = e
if globals().get("order") and order:
error += f" Order uri: {order.uri}"
certs.append({
"cert": False,
"pending_cert": pending_cert,
Expand Down Expand Up @@ -459,14 +488,17 @@ def get_ordered_certificates(self, pending_certs):
"pending_cert": entry["pending_cert"],
})
except (PollError, AcmeError, Exception) as e:
sentry.captureException()
metrics.send('get_ordered_certificates_resolution_error', 'counter', 1)
order_url = order.uri
error = f"{e}. Order URI: {order_url}"
current_app.logger.error(
"Unable to resolve pending cert: {}. "
"Check out {} for more information.".format(pending_cert, order_url), exc_info=True)
f"Unable to resolve pending cert: {pending_cert}. "
f"Check out {order_url} for more information.", exc_info=True)
certs.append({
"cert": False,
"pending_cert": entry["pending_cert"],
"last_error": e,
"last_error": error,
})
# Ensure DNS records get deleted
self.acme.cleanup_dns_challenges(
Expand Down

0 comments on commit 272285f

Please sign in to comment.