Skip to content

Commit

Permalink
Merge walbermr's fork into master
Browse files Browse the repository at this point in the history
  • Loading branch information
cardoso-neto committed May 7, 2019
2 parents 4f4b4f9 + 6e4369a commit 544d34b
Showing 1 changed file with 16 additions and 1 deletion.
17 changes: 16 additions & 1 deletion http_parser/page_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,23 @@ def parse(self):

if str(type(tag)) == "<class 'bs4.element.Tag'>":

#look for global.document.metadata
if tag.name == 'script':
continue
if tag.string:
position = tag.string.find('global.document.metadata=')
if position == -1:
continue
else:
a = 'global.document.metadata='
t = Tag('global.document.metadata')

s = tag.string[position + len(a):]
s = s[:s.find('\n')-1]

t.add_content(s)

results.append(t.get_data())


# Find tags with no children (base tags)
if tag.contents:
Expand Down

0 comments on commit 544d34b

Please sign in to comment.