Skip to content

Commit

Permalink
fix cloudflare bot detection
Browse files Browse the repository at this point in the history
  • Loading branch information
freddy36 committed Jun 14, 2024
1 parent e5ebfac commit 4ebd4f4
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 2 deletions.
22 changes: 20 additions & 2 deletions audiobookdl/sources/source/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from . import networking
from audiobookdl import logging, AudiobookFile, Chapter, AudiobookMetadata, Cover, Result, Audiobook, BookId
from audiobookdl.exceptions import DataNotPresent
from audiobookdl.utils import CustomSSLContextHTTPAdapter

# External imports
import requests
Expand All @@ -11,6 +12,8 @@
import os
from http.cookiejar import MozillaCookieJar
from typing import Any, Dict, List, Optional, TypeVar, Generic
from ssl import SSLContext
import urllib3

T = TypeVar("T")

Expand All @@ -33,10 +36,10 @@ class Source(Generic[T]):
# Cache of previously loaded pages
__pages: Dict[str, bytes] = {}

def __init__(self, options):
def __init__(self, options: Any):
self.database_directory = os.path.join(options.database_directory, self.name)
self.skip_downloaded = options.skip_downloaded
self._session = requests.Session()
self._session: requests.Session = self.create_session(options)
if self.create_storage_dir:
os.makedirs(self.database_directory, exist_ok=True)

Expand Down Expand Up @@ -172,3 +175,18 @@ def find_all_in_page(self, url: str, regex: str, **kwargs) -> list:
post_json = networking.post_json
get_json = networking.get_json
get_stream_files = networking.get_stream_files

def create_ssl_context(self, options: Any) -> SSLContext:
ssl_context: SSLContext = urllib3.util.create_urllib3_context()
# Prevent the padding extension from appearing in the TLS ClientHello
# It's used by Cloudflare for bot detection
# See issue #106
ssl_context.options &= ~(1 << 4) # SSL_OP_TLSEXT_PADDING
return ssl_context

def create_session(self, options: Any) -> requests.Session:
session = requests.Session()
ssl_context: SSLContext = self.create_ssl_context(options)
# session.adapters.pop("https://", None)
session.mount("https://", CustomSSLContextHTTPAdapter(ssl_context))
return session
18 changes: 18 additions & 0 deletions audiobookdl/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import importlib.resources
from typing import Sequence
import shutil
from urllib3.poolmanager import PoolManager
from requests.adapters import HTTPAdapter
from ssl import SSLContext

def levenstein_distance(a: str, b: str) -> int:
"""
Expand Down Expand Up @@ -37,3 +40,18 @@ def read_asset_file(path: str) -> str:
def program_in_path(program: str) -> bool:
"""Checks whethher `program` is in the path"""
return shutil.which(program) is not None

class CustomSSLContextHTTPAdapter(HTTPAdapter):
"""Transport adapter that allows us to use a custom SSLContext."""

def __init__(self, ssl_context: SSLContext, **kwargs) -> None:
self.ssl_context: SSLContext = ssl_context
super().__init__(**kwargs)

def init_poolmanager(self, connections, maxsize, block=False):
self.poolmanager = PoolManager(
num_pools=connections,
maxsize=maxsize,
block=block,
ssl_context=self.ssl_context,
)

0 comments on commit 4ebd4f4

Please sign in to comment.