Skip to content

Commit

Permalink
Multiple minor bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
dhondta committed Mar 4, 2018
1 parent 164f804 commit f8801ef
Showing 1 changed file with 31 additions and 21 deletions.
52 changes: 31 additions & 21 deletions webgrep
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,16 @@ P3 = sys.version_info >= (3,0)
# Python2/3 specific imports
if P3:
from urllib.request import getproxies
from urllib.parse import urlparse, urljoin
from urllib.parse import unquote, urlparse, urljoin
else:
from urllib import getproxies
from urllib import getproxies, unquote
from urlparse import urljoin, urlparse
# BeautifulSoup
try:
import bs4
except ImportError:
print("BeautifulSoup is not installed !\nPlease run 'sudo pip{} install"
" beautifulsoup4' before continuing.".format(["", "3"][P3]))
print("BeautifulSoup is not installed !\nPlease run 'sudo pip install"
" beautifulsoup4' before continuing.")
sys.exit(1)
# colorize logging
try:
Expand All @@ -63,8 +63,8 @@ try:
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
except AttributeError:
print("Failed to disable warnings for requests !\nPlease run 'sudo pip{} "
"install --upgrade requests' to fix it.".format(["", "3"][P3]))
print("Failed to disable warnings for requests !\nPlease run 'sudo pip "
"install --upgrade requests' to fix it.")
sys.exit(1)
logging.getLogger("requests").setLevel(logging.CRITICAL)

Expand Down Expand Up @@ -94,8 +94,9 @@ CACHE_FILE = "cache.json"
# - value: tuple of the form (type, handler, message if not exists)
# NB: the handler always takes a Resource object in argument
# Handlers are automatically set as attributes in 'args'
b = lambda c: bytes(c, 'utf-8') if P3 and not isinstance(c, bytes) else \
c.encode('utf-8') if isinstance(c, unicode) else c
b = lambda c: (bytes(c, 'utf-8') if not isinstance(c, bytes) else c) if P3 \
else (c.encode('utf-8') if isinstance(c, unicode) else c)
s = lambda c: (c.decode("utf-8") if isinstance(c, bytes) else c) if P3 else c
get_cmd = lambda t: lambda r: Popen([t, r.rel_fn], stdout=PIPE,
stderr=DEVNULL, cwd=args.tmp).communicate()[0]

Expand All @@ -106,11 +107,12 @@ def css_unminifier(res, indent=2, maxlen=256):
if res.type != "style":
return
# CSS is considered minified if any line is longer than a given length
if any([len(l) > maxlen for l in res.content.split('\n')]):
res.content = re.sub("\*\/", "*/\r\n", res.content)
res.content = re.sub("\{", " {\r\n" + " " * indent, res.content)
res.content = re.sub(";", ";\r\n" + " " * indent, res.content)
res.content = re.sub("\}", ";\r\n}\r\n", res.content)
res.content, i = b(res.content), indent
if any([len(l) > maxlen for l in res.content.split(b('\n'))]):
res.content = re.sub(b("\*\/"), b("*/\r\n"), res.content)
res.content = re.sub(b("\{"), b(" {\r\n") + b(" ") * i, res.content)
res.content = re.sub(b(";"), b(";\r\n") + b(" ") * i, res.content)
res.content = re.sub(b("\}"), b(";\r\n}\r\n"), res.content)
return res.content


Expand All @@ -132,10 +134,10 @@ def inline_items(tag, rtype=None, attrs=None):
res.type = rtype
if not hasattr(res, "content"): # not in cache
logger.debug("> Extracting {}".format(res.abs_fn))
res.content = b(item.text)
res.content = item.text
res.preprocess()
with open(res.abs_fn, 'wb') as f:
f.write(res.content)
f.write(b(res.content))
item.decompose()
res.grep().handle()
return b(str(page.soup))
Expand Down Expand Up @@ -201,10 +203,10 @@ PAGE_PREPROCESSORS = {
"inline-style": ('function', inline_items("style"), None),
}
SCRIPT_PREPROCESSORS = {
"jsbeautifier": ('module', lambda r: jsbeautifier.beautify(r.content),
"jsbeautifier": ('module',
lambda r: b(jsbeautifier.beautify(s(r.content))),
"Python library required for deobfuscating Javascript ;"
"\n consider running 'sudo pip{} install jsbeautifier'"
.format(["2", "3"][P3])),
"\n consider running 'sudo pip install jsbeautifier'"),
}
STYLE_PREPROCESSORS = {
"unminifier": ('function', css_unminifier, None),
Expand Down Expand Up @@ -357,9 +359,16 @@ class Resource(object):
# first, decompose the path (data:[parsed.path])
# form: [category]/[type];[encoding],[data]
# e.g.: image/png;base64,...
rtype, data = parsed.path.split(";", 1)
self.type, ext = rtype.split('/', 1)
enc, data = data.split(",", 1)
try:
rtype, data = parsed.path.split(";", 1)
self.type, ext = rtype.split('/', 1)
enc, data = data.split(",", 1)
except ValueError: # occurs e.g. with "image/svg+xml,%3Csvg..."
rtype, data = parsed.path.split(",", 1)
self.type, ext = rtype.split('/', 1)
ext = ext.split("+", 1)[0]
data = unquote(data)
enc = "none"
try:
if enc.strip() not in ["none", "base64"]:
raise Exception("Bad image encoding")
Expand Down Expand Up @@ -611,6 +620,7 @@ class Resource(object):
for tool, handler in getattr(args, attr):
logger.debug("> Preprocessing with {}".format(tool))
self.content = handler(self)
self.content = b(self.content)

@staticmethod
def pprint_req(request):
Expand Down

0 comments on commit f8801ef

Please sign in to comment.