Skip to content

Commit

Permalink
登录模块
Browse files Browse the repository at this point in the history
  • Loading branch information
Ehco1996 committed Feb 14, 2018
1 parent da5bebf commit cc1dddb
Show file tree
Hide file tree
Showing 11 changed files with 670 additions and 105 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,4 @@ Beautiful Soup 爬虫/.vscode/settings.json
*.settings.json
toapi-91baby/.vscode/settings.json
mazhifu/.vscode/settings.json
*.json
File renamed without changes.
569 changes: 569 additions & 0 deletions zhihu/zhihu_easy/client.py

Large diffs are not rendered by default.

52 changes: 52 additions & 0 deletions zhihu/zhihu_easy/parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import json
import os

BASE_DIR = os.path.dirname(os.path.abspath(__file__))


def parse_activites(file_path):
'''
解析用户动态数据
rtype:
list
'''
with open(file_path) as f:
data = json.load(f).get('data')
res = []
for action in data:
verb = action['verb']
if verb == 'ANSWER_VOTE_UP' or verb == 'ANSWER_CREATE': # 赞同/回答的行为
question_id = action['target']['question']['id']
question_api_url = action['target']['question']['url']
question_name = action['target']['question']['title']

answer_id = action['target']['id']
answer_api_url = action['target']['url']
answer_content = action['target']['excerpt']
elif verb == 'QUESTION_FOLLOW': # 关注问题的行为
question_id = action['target']['id']
question_api_url = action['target']['url']
question_name = action['target']['title']

answer_id = ''
answer_api_url = ''
answer_content = ''
else:
continue

res.append({
'question_id': question_id,
'question_name': question_name,
'question_api_url': question_api_url,
'answer_id': answer_id,
'answer_api_url': answer_api_url,
'answer_content': answer_content, })
return res


for file in os.listdir(BASE_DIR+'/data/'):
file_abs_path = BASE_DIR+'/data/'+file
res = parse_activites(file_abs_path)
for data in res:
for k, v in data.items():
print(k, v)
34 changes: 34 additions & 0 deletions zhihu/zhihu_easy/spider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import json
import time
import os

from client import ZhihuClient

BASE_DIR = os.path.dirname(os.path.abspath(__file__))


def download_activs_json(s, url, count=1):
'''
获取用户信息的json信息
'''
res = s.get(url).json()
with open(BASE_DIR+'/data/{}.json'.format(count), 'w') as f:
f.write(json.dumps(res, ensure_ascii=False))
print('正在下载第{}份动态'.format(count))
count += 1
time.sleep(3)
# 递归下载 知道动态下载完毕
if res['paging']['is_end'] == False:
next_url = res['paging']['next']
download_activs_json(s, next_url, count)
else:
print('所有动态下载完毕')


# 登录知乎
s = ZhihuClient('', '').get_session()
# 增加权限认证
s.headers.update({'authorization': ''})
# 起始动态url
start_url = 'https://www.zhihu.com/api/v4/members/Ehcostuff/activities?limit=8&after_id=1518305424&desktop=True'
download_activs_json(s, start_url)
14 changes: 14 additions & 0 deletions zhihu/zhihu_easy/tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import shutil

import requests


def get_image(url, path):
res = requests.get(url, stream=True)
with open(path, 'wb') as f:
shutil.copyfileobj(res.raw, f)


def save_html(text, name):
with open(name, 'w') as f:
f.write(text)
3 changes: 0 additions & 3 deletions zhihu/zhihu_hard/.vscode/settings.json

This file was deleted.

12 changes: 0 additions & 12 deletions zhihu/zhihu_hard/src/configs.py

This file was deleted.

19 changes: 0 additions & 19 deletions zhihu/zhihu_hard/src/parse.py

This file was deleted.

30 changes: 0 additions & 30 deletions zhihu/zhihu_hard/src/spider.py

This file was deleted.

41 changes: 0 additions & 41 deletions zhihu/zhihu_hard/src/tools.py

This file was deleted.

0 comments on commit cc1dddb

Please sign in to comment.