Skip to content

Commit

Permalink
limit argument cache size
Browse files Browse the repository at this point in the history
  • Loading branch information
kmike committed Apr 15, 2016
1 parent 72dd737 commit 37e4bf4
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 30 deletions.
50 changes: 48 additions & 2 deletions splash/argument_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,18 @@
from __future__ import absolute_import
import json
import hashlib
from collections import OrderedDict


class ArgumentCache(object):
"""
>>> cache = ArgumentCache()
>>> "foo" in cache
False
>>> cache['foo']
Traceback (most recent call last):
...
KeyError: 'foo'
>>> len(cache)
0
>>> key = cache.add("Hello, world!")
Expand Down Expand Up @@ -38,16 +43,57 @@ class ArgumentCache(object):
>>> cache.clear()
>>> len(cache)
0
Size of ArgumentCache can be limited:
>>> cache = ArgumentCache(0)
Traceback (most recent call last):
...
ValueError: maxsize must be greater than 0
>>> cache = ArgumentCache(2) # limit it to 2 elements
>>> cache.add_many(['value1', 'value2'])
['daf626c4ebd6bdd697e043111454304e5fb1459e', '849988af22dbd04d3e353caf77f9d81241ca9ee2']
>>> len(cache)
2
>>> cache.add("Hello, world!")
'bea2c9d7fd040292e0424938af39f7d6334e8d8a'
>>> len(cache)
2
>>> cache["bea2c9d7fd040292e0424938af39f7d6334e8d8a"]
'Hello, world!'
>>> cache['849988af22dbd04d3e353caf77f9d81241ca9ee2']
'value2'
>>> cache['daf626c4ebd6bdd697e043111454304e5fb1459e']
Traceback (most recent call last):
...
KeyError: 'daf626c4ebd6bdd697e043111454304e5fb1459e'
>>> cache.add("foo")
'd465e627f9946f2fa0d2dc0fc04e5385bc6cd46d'
>>> len(cache)
2
>>> 'bea2c9d7fd040292e0424938af39f7d6334e8d8a' in cache
False
"""
def __init__(self):
self._values = {} # TODO: LRU cache
def __init__(self, maxsize=None):
if maxsize is None:
maxsize = float("+inf")
if maxsize <= 0:
raise ValueError("maxsize must be greater than 0")
self.maxsize = maxsize
self._values = OrderedDict()

def add(self, value):
key = self.get_key(value)
if key in self._values:
del self._values[key]
else:
while len(self._values) >= self.maxsize:
self._values.popitem(last=False)
self._values[key] = value
return key

def __getitem__(self, key):
self._values.move_to_end(key)
return self._values[key]

def __contains__(self, key):
Expand Down
3 changes: 3 additions & 0 deletions splash/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@
# pool options
SLOTS = 50

# argument cache option
ARGUMENT_CACHE_MAX_ENTRIES = 500

# security options
ALLOWED_SCHEMES = ['http', 'https', 'data', 'ftp', 'sftp', 'ws', 'wss']
JS_CROSS_DOMAIN_ENABLED = False
Expand Down
65 changes: 40 additions & 25 deletions splash/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,19 +80,17 @@ def _format_error(self, code, exc):
return err


cache = ArgumentCache()


class BaseRenderResource(_ValidatingResource):

isLeaf = True
content_type = "text/html; charset=utf-8"

def __init__(self, pool, max_timeout):
def __init__(self, pool, max_timeout, argument_cache):
Resource.__init__(self)
self.pool = pool
self.js_profiles_path = self.pool.js_profiles_path
self.max_timeout = max_timeout
self.argument_cache = argument_cache

def render_GET(self, request):
#log.msg("%s %s %s %s" % (id(request), request.method, request.path, request.args))
Expand All @@ -101,18 +99,18 @@ def render_GET(self, request):

# process argument cache
original_options = render_options.data.copy()
expired_args = render_options.get_expired_args(cache)
expired_args = render_options.get_expired_args(self.argument_cache)
if expired_args:
error = self._write_expired_args(request, expired_args)
self._log_stats(request, original_options, error)
return b"\n"

saved_args = render_options.save_args_to_cache(cache)
saved_args = render_options.save_args_to_cache(self.argument_cache)
if saved_args:
value = ';'.join("{}={}".format(name, value)
for name, value in saved_args)
request.setHeader(b'X-Splash-Saved-Arguments', value.encode('utf8'))
render_options.load_cached_args(cache)
render_options.load_cached_args(self.argument_cache)

# check arguments before starting the render
render_options.get_filters(self.pool)
Expand Down Expand Up @@ -284,8 +282,10 @@ class ExecuteLuaScriptResource(BaseRenderResource):
def __init__(self, pool, sandboxed,
lua_package_path,
lua_sandbox_allowed_modules,
max_timeout):
BaseRenderResource.__init__(self, pool, max_timeout)
max_timeout,
argument_cache,
):
BaseRenderResource.__init__(self, pool, max_timeout, argument_cache)
self.sandboxed = sandboxed
self.lua_package_path = lua_package_path
self.lua_sandbox_allowed_modules = lua_sandbox_allowed_modules
Expand Down Expand Up @@ -341,8 +341,9 @@ def _get_render(self, request, options):
class DebugResource(Resource):
isLeaf = True

def __init__(self, pool, warn=False):
def __init__(self, pool, argument_cache, warn=False):
Resource.__init__(self)
self.argument_cache = argument_cache
self.pool = pool
self.warn = warn

Expand All @@ -354,13 +355,14 @@ def render_GET(self, request):
"qsize": len(self.pool.queue.pending),
"maxrss": resource.getrusage(resource.RUSAGE_SELF).ru_maxrss,
"fds": get_num_fds(),
"argcache": len(self.argument_cache)
}
if self.warn:
info['WARNING'] = "/debug endpoint is deprecated. " \
"Please use /_debug instead."
# info['leaks'] = get_leaks()

return (json.dumps(info)).encode('utf-8')
return (json.dumps(info, sort_keys=True)).encode('utf-8')

def get_repr(self, render):
if hasattr(render, 'url'):
Expand All @@ -372,13 +374,20 @@ class ClearCachesResource(Resource):
isLeaf = True
content_type = "application/json"

def __init__(self, argument_cache):
Resource.__init__(self)
self.argument_cache = argument_cache

def render_POST(self, request):
argcache_size = len(self.argument_cache)
self.argument_cache.clear()
clear_caches()
unreachable = gc.collect()
return json.dumps({
"status": "ok",
"pyobjects_collected": unreachable
}).encode('utf-8')
"pyobjects_collected": unreachable,
"cached_args_removed": argcache_size,
}, sort_keys=True).encode('utf-8')


class PingResource(Resource):
Expand All @@ -389,7 +398,7 @@ def render_GET(self, request):
return (json.dumps({
"status": "ok",
"maxrss": get_ru_maxrss(),
})).encode('utf-8')
}, sort_keys=True)).encode('utf-8')



Expand Down Expand Up @@ -554,30 +563,36 @@ class Root(Resource):
def __init__(self, pool, ui_enabled, lua_enabled, lua_sandbox_enabled,
lua_package_path,
lua_sandbox_allowed_modules,
max_timeout):
max_timeout,
argument_cache_max_entries,
):
Resource.__init__(self)
self.argument_cache = ArgumentCache(argument_cache_max_entries)
self.ui_enabled = ui_enabled
self.lua_enabled = lua_enabled
self.putChild(b"render.html", RenderHtmlResource(pool, max_timeout))
self.putChild(b"render.png", RenderPngResource(pool, max_timeout))
self.putChild(b"render.jpeg", RenderJpegResource(pool, max_timeout))
self.putChild(b"render.json", RenderJsonResource(pool, max_timeout))
self.putChild(b"render.har", RenderHarResource(pool, max_timeout))

self.putChild(b"_debug", DebugResource(pool))
self.putChild(b"_gc", ClearCachesResource())

_args = pool, max_timeout, self.argument_cache
self.putChild(b"render.html", RenderHtmlResource(*_args))
self.putChild(b"render.png", RenderPngResource(*_args))
self.putChild(b"render.jpeg", RenderJpegResource(*_args))
self.putChild(b"render.json", RenderJsonResource(*_args))
self.putChild(b"render.har", RenderHarResource(*_args))

self.putChild(b"_debug", DebugResource(pool, self.argument_cache))
self.putChild(b"_gc", ClearCachesResource(self.argument_cache))
self.putChild(b"_ping", PingResource())

# backwards compatibility
self.putChild(b"debug", DebugResource(pool, warn=True))
self.putChild(b"debug", DebugResource(pool, self.argument_cache, warn=True))

if self.lua_enabled and ExecuteLuaScriptResource is not None:
self.putChild(b"execute", ExecuteLuaScriptResource(
pool=pool,
sandboxed=lua_sandbox_enabled,
lua_package_path=lua_package_path,
lua_sandbox_allowed_modules=lua_sandbox_allowed_modules,
max_timeout=max_timeout
max_timeout=max_timeout,
argument_cache=self.argument_cache,
))

if self.ui_enabled:
Expand Down
17 changes: 14 additions & 3 deletions splash/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ def parse_opts(jupyter=False, argv=sys.argv):
help="disable web UI")
op.add_option("--disable-lua", action="store_true", default=False,
help="disable Lua scripting")
op.add_option("--argument-cache-max-entries", type="int",
default=defaults.ARGUMENT_CACHE_MAX_ENTRIES,
help="maximum number of entries in arguments cache (default: %default)")

opts, args = op.parse_args(argv)

Expand All @@ -84,11 +87,11 @@ def parse_opts(jupyter=False, argv=sys.argv):
opts.port = None
opts.slots = None
opts.max_timeout = None
opts.argument_cache_max_entries = None

return opts, args



def start_logging(opts):
import twisted
from twisted.python import log
Expand Down Expand Up @@ -168,6 +171,7 @@ def splash_server(portnum, slots, network_manager_factory, max_timeout,
lua_sandbox_enabled=True,
lua_package_path="",
lua_sandbox_allowed_modules=(),
argument_cache_max_entries=None,
verbosity=None):
from twisted.internet import reactor
from twisted.web.server import Site
Expand All @@ -182,6 +186,9 @@ def splash_server(portnum, slots, network_manager_factory, max_timeout,
slots = defaults.SLOTS if slots is None else slots
log.msg("slots=%s" % slots)

if argument_cache_max_entries:
log.msg("argument_cache_max_entries=%s" % argument_cache_max_entries)

pool = RenderPool(
slots=slots,
network_manager_factory=network_manager_factory,
Expand Down Expand Up @@ -211,7 +218,8 @@ def splash_server(portnum, slots, network_manager_factory, max_timeout,
lua_sandbox_enabled=lua_sandbox_enabled,
lua_package_path=lua_package_path,
lua_sandbox_allowed_modules=lua_sandbox_allowed_modules,
max_timeout=max_timeout
max_timeout=max_timeout,
argument_cache_max_entries=argument_cache_max_entries,
)
factory = Site(root)
reactor.listenTCP(portnum, factory)
Expand Down Expand Up @@ -255,6 +263,7 @@ def default_splash_server(portnum, max_timeout, slots=None,
lua_sandbox_enabled=True,
lua_package_path="",
lua_sandbox_allowed_modules=(),
argument_cache_max_entries=None,
verbosity=None,
server_factory=splash_server):
from splash import network_manager
Expand All @@ -278,7 +287,8 @@ def default_splash_server(portnum, max_timeout, slots=None,
lua_package_path=lua_package_path,
lua_sandbox_allowed_modules=lua_sandbox_allowed_modules,
verbosity=verbosity,
max_timeout=max_timeout
max_timeout=max_timeout,
argument_cache_max_entries=argument_cache_max_entries,
)


Expand Down Expand Up @@ -358,6 +368,7 @@ def main(jupyter=False, argv=sys.argv, server_factory=splash_server):
lua_sandbox_allowed_modules=opts.lua_sandbox_allowed_modules.split(";"),
verbosity=opts.verbosity,
max_timeout=opts.max_timeout,
argument_cache_max_entries=opts.argument_cache_max_entries,
server_factory=server_factory,
)
signal.signal(signal.SIGUSR1, lambda s, f: traceback.print_stack(f))
Expand Down

0 comments on commit 37e4bf4

Please sign in to comment.