Skip to content

Commit

Permalink
Merge pull request #8 from nikhgupta/fix-deprecation
Browse files Browse the repository at this point in the history
Fix deprecated code.
  • Loading branch information
dangra committed Apr 21, 2014
2 parents 15b61a8 + 736423b commit 3514df8
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
2 changes: 1 addition & 1 deletion dirbot/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
NEWSPIDER_MODULE = 'dirbot.spiders'
DEFAULT_ITEM_CLASS = 'dirbot.items.Website'

ITEM_PIPELINES = ['dirbot.pipelines.FilterWordsPipeline']
ITEM_PIPELINES = {'dirbot.pipelines.FilterWordsPipeline': 1}
12 changes: 6 additions & 6 deletions dirbot/spiders/dmoz.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from scrapy.spider import Spider
from scrapy.selector import HtmlXPathSelector
from scrapy.selector import Selector

from dirbot.items import Website

Expand All @@ -20,15 +20,15 @@ def parse(self, response):
@url http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/
@scrapes name
"""
hxs = HtmlXPathSelector(response)
sites = hxs.select('//ul[@class="directory-url"]/li')
sel = Selector(response)
sites = sel.xpath('//ul[@class="directory-url"]/li')
items = []

for site in sites:
item = Website()
item['name'] = site.select('a/text()').extract()
item['url'] = site.select('a/@href').extract()
item['description'] = site.select('text()').re('-\s([^\n]*?)\\n')
item['name'] = site.xpath('a/text()').extract()
item['url'] = site.xpath('a/@href').extract()
item['description'] = site.xpath('text()').re('-\s([^\n]*?)\\n')
items.append(item)

return items

0 comments on commit 3514df8

Please sign in to comment.