Skip to content

Commit

Permalink
简化写法
Browse files Browse the repository at this point in the history
  • Loading branch information
chyroc committed May 11, 2016
1 parent 3ba2ce2 commit 041008b
Showing 1 changed file with 21 additions and 23 deletions.
44 changes: 21 additions & 23 deletions wechatsogou.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,12 @@ def __get_elem_text(self, elem):

def __get_encoding_from_reponse(self, r):
encoding = requests.utils.get_encodings_from_content(r.text)
if encoding:
return encoding[0]
else:
return requests.utils.get_encoding_from_headers(r.headers)
return encoding[0] if encoding else requests.utils.get_encoding_from_headers(r.headers)

def get_session(self):
with Session() as self.session:
pass


def __get(self, url, host='', referer=''):
headers = {
"User-Agent": 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/45.0.2454.101 Chrome/45.0.2454.101 Safari/537.36',
Expand All @@ -80,10 +76,7 @@ def __get(self, url, host='', referer=''):
# 'http' : proip_http['http']+"://" + proip_http['ip'] + ":" + proip_http['duan'],
# 'https' : proip_http['http'] + "://" + proip_http['ip'] + ":" + proip_http['duan']
# }
if hasattr(self, 'session'):
req = self.session
else:
req = requests
req = self.session if hasattr(self, 'session') else requests
r = req.get(url, headers=headers) #, proxies=proxies
if r.status_code == requests.codes.ok:
r.encoding = self.__get_encoding_from_reponse(r)
Expand Down Expand Up @@ -111,14 +104,16 @@ def search_gzh(self, name, page=1):
def search_gzh_info(self, name, page):
text = self.search_gzh(name, page)
page = etree.HTML(text)
info_imgs = page.xpath(u"//div[@class='img-box']/img")
img = list()
info_imgs = page.xpath(u"//div[@class='img-box']/img")
for info_img in info_imgs:
img.append(info_img.attrib['src'])
info_urls = page.xpath(u"//div[@target='_blank']")

url = list()
info_urls = page.xpath(u"//div[@target='_blank']")
for info_url in info_urls:
url.append(info_url.attrib['href'])

name = list()
wechatid = list()
jieshao = list()
Expand All @@ -137,22 +132,24 @@ def search_gzh_info(self, name, page):
else:
jieshao.append(cache_re[2])
renzhen.append('')

qrcodes = list()
info_qrcodes = page.xpath(u"//div[@class='pos-ico']/div/img")
for info_qrcode in info_qrcodes:
qrcodes.append(info_qrcode.attrib['src'])

returns = list()
for i in range(len(qrcodes)):
returns.append(
{
'name':name[i],
'wechatid':wechatid[i],
'jieshao':jieshao[i],
'renzhen':renzhen[i],
'qrcode':qrcodes[i],
'img':img[i],
'url':url[i]
}
'name': name[i],
'wechatid': wechatid[i],
'jieshao': jieshao[i],
'renzhen': renzhen[i],
'qrcode': qrcodes[i],
'img': img[i],
'url': url[i]
}
)
return returns

Expand Down Expand Up @@ -213,7 +210,7 @@ def __deal_comment(self, text):
comment_text = self.__get(comment_req_url, 'mp.weixin.qq.com', 'http://mp.weixin.qq.com')
comment_dict = eval(comment_text)
ret = comment_dict['base_resp']['ret']
errmsg = comment_dict['base_resp']['errmsg']
errmsg = comment_dict['base_resp']['errmsg'] if comment_dict['base_resp']['errmsg'] else 'ret:' + str(ret)
if ret != 0:
raise WechatSogouException(errmsg)
return comment_dict
Expand All @@ -232,15 +229,16 @@ def __deal_related(self, article):
if ret != 0:
raise WechatSogouException(errmsg)
return related_dict

def __deal_content(self, text):
content_html = re.findall(r'<div class="rich_media_content " id="js_content">(.*?)</div>', text, re.S)[0]
content_rich = re.sub(r'<(?!img|br).*?>', '', content_html)
pipei = re.compile(r'<img(.*?)src="(.*?)"(.*?)/>')
content_text = pipei.sub(lambda m: '<img src="' + m.group(2) + '" />', content_rich)
return content_html, content_rich, content_text
def get_get_gzh_article_info(self, article):
content_url = article['content_url']
text = self.__get(content_url, 'mp.weixin.qq.com')

def get_gzh_article_info(self, article):
text = self.__get(article['content_url'], 'mp.weixin.qq.com')
yuan_url = re.findall('var msg_link = "(.*?)";', text)[0].replace('amp;','')
related = self.__deal_related(article)
comment = self.__deal_comment(text)
Expand Down

0 comments on commit 041008b

Please sign in to comment.