Skip to content

Commit

Permalink
做了一些分离的工作
Browse files Browse the repository at this point in the history
  • Loading branch information
root committed Jul 6, 2013
1 parent 4015766 commit dbf87fe
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 54 deletions.
62 changes: 62 additions & 0 deletions mythread.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/python
#coding=utf-8
import threading
from Queue import Queue
from dytt8.dytt8 import dytt8
from xunbo.xunbo import xunbo
#下面的dy 就是dytt8的这个class的一个引用,
dy = dytt8(10)
xunbo = xunbo(3)

ftp_urls = []

class ThreadUrl(threading.Thread):
'''
封装多线程库,用来多线程跑啊
'''

def __init__(self,queue,site):
threading.Thread.__init__(self)
self.queue = queue
self.site = site #传递的是一个class的实例或者引用

def run(self):

while True:
try:
url = self.queue.get()
t = self.site.ftp_url(url)
if len(t) > 1:
ftp_urls.append(t)
except:
pass
self.queue.task_done()



def mythread(site,num=20):
'''
num: 结合队列,跑多线程的抓取,默认线程数是20个
site: 这是一个关于某个站点的引用,比如 t = dytt8()
'''
queue = Queue()

for i in range(num):

t= ThreadUrl(queue,site)

t.setDaemon(True)

t.start()


for url in site.http_url():

queue.put(url)

queue.join()

for ftp_url in ftp_urls:
print ftp_url
if __name__ == '__main__':
mythread(xunbo)
60 changes: 6 additions & 54 deletions spider.py
Original file line number Diff line number Diff line change
@@ -1,60 +1,12 @@
#!/usr/bin/python
#coding=utf-8
import threading
from Queue import Queue
import sqlite3

from dytt8.dytt8 import dytt8
from xunbo.xunbo import xunbo
from mythread import mythread

dy = dytt8(10)

db = sqlite3.connect("/Users/chenqing/hellopy/spider/spider.db")

link = db.cursor()

ftp_urls = []


class ThreadUrl(threading.Thread):


def __init__(self,queue):
threading.Thread.__init__(self)
self.queue = queue

def run(self):

while True:
url = self.queue.get()

t = dy.ftp_url(url)
if len(t) > 1:
ftp_urls.append(t)
self.queue.task_done()



if __name__ == '__main__':

queue = Queue()

for i in range(20):

t= ThreadUrl(queue)

t.setDaemon(True)

t.start()


for url in dy.http_url():

queue.put(url)

queue.join()

for ftp_url in ftp_urls:
print ftp_url
#dytt8 = dytt8(5)

print "开始抓取迅播前三页的电影链接。。。"
xunbo = xunbo(3)

db.close()
mythread(xunbo)

0 comments on commit dbf87fe

Please sign in to comment.