-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
333 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,28 @@ | ||
# deepdao-spider | ||
test | ||
|
||
--- | ||
|
||
# **deepdao数据爬虫+入库** | ||
|
||
## 代码结构 | ||
``` | ||
├── anal.py -> 将数据解析至本地txt | ||
├── main.py -> 爬虫主代码 | ||
└── todb.py -> 将本地txt数据入库 | ||
└── README.md | ||
``` | ||
|
||
## 依赖环境 | ||
- python 3.6.4 | ||
- requests | ||
- pymysql | ||
|
||
## 项目运行 | ||
1. 拉取本项目,进入项目目录。 | ||
2. 开始抓取。抓取过程中会在当前目录下生成几个解析的txt,便于备份数据以及校验。 | ||
```bash | ||
python3 main.py | ||
``` | ||
3. 等待抓取完成后,确认生成了6个txt文件(分别对应6个数据表),即可开始执行入库脚本。 | ||
```bash | ||
python3 todb.py | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
import datetime | ||
import imp | ||
import json | ||
import time | ||
from pymysql.converters import escape_string | ||
|
||
platform_name = "deepdao" | ||
platform_url = "https://deepdao.io/" | ||
|
||
def anal_dao_list(daoId, daoName, dao_info): | ||
|
||
organization_name = daoName | ||
organization_url = daoId | ||
treasury_count = str(dao_info["data"]["aum"]) | ||
members_num = str(dao_info["data"]["membersCount"]) | ||
proposals_num = str(dao_info["data"]["proposalsCount"]) | ||
snapshot_time = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) | ||
snapshot_date = str(time.strftime("%Y-%m-%d", time.localtime())) | ||
with open("./dao_list.txt", "a+", encoding="utf-8") as f: | ||
f.write('\t'.join([platform_name, platform_url, organization_name, organization_url, treasury_count, members_num, proposals_num, snapshot_time, snapshot_date])+'\n') | ||
|
||
|
||
def anal_dao_proposal_list(daoId, daoName, dao_proposals): | ||
|
||
for proposal in dao_proposals["decisions"]: | ||
organization_name = daoName | ||
organization_url = daoId | ||
proposal_title = escape_string(proposal["title"]).replace("\t","") | ||
proposal_content = escape_string(proposal["description"]).replace("\t","") | ||
proposal_start_time = datetime.datetime.strptime(proposal["createdAt"][:-5], "%Y-%m-%dT%H:%M:%S") | ||
proposal_end_time = proposal_start_time+datetime.timedelta(days=3) | ||
proposal_proposed_name = str(proposal["proposer"]) | ||
proposal_outcome = str(proposal["status"]) | ||
proposal_votes_num = str(proposal["votes"]) | ||
proposal_for_num = str(proposal["sharesFor"]) | ||
proposal_against_num = str(proposal["sharesAgainst"]) | ||
snapshot_time = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) | ||
snapshot_date = str(time.strftime("%Y-%m-%d", time.localtime())) | ||
with open("./dao_proposal_list.txt", "a+", encoding="utf-8") as f: | ||
f.write('\t'.join([platform_name, platform_url, organization_name, organization_url, proposal_title, proposal_content, str(proposal_start_time), str(proposal_end_time), proposal_proposed_name, proposal_outcome, proposal_votes_num, proposal_for_num, proposal_against_num, snapshot_time, snapshot_date])+'\n') | ||
|
||
|
||
def anal_proposal_voter_list(daoId, daoName, proposal, proposal_voters): | ||
|
||
for voter in proposal_voters["data"]["votes"]: | ||
organization_name = daoName | ||
organization_url = daoId | ||
proposal_title = escape_string(proposal["title"]) | ||
proposal_start_time = datetime.datetime.strptime(proposal["createdAt"][:-5], "%Y-%m-%dT%H:%M:%S") | ||
proposal_end_time = proposal_start_time+datetime.timedelta(days=3) | ||
proposal_proposed_name = str(proposal["proposer"]) | ||
voter_name = str(voter["voter"]) | ||
voter_opinion = str(voter["choice"]) | ||
voter_count = str(voter["vp"]) | ||
snapshot_time = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) | ||
snapshot_date = str(time.strftime("%Y-%m-%d", time.localtime())) | ||
with open("./proposal_voter_list.txt", "a+", encoding="utf-8") as f: | ||
f.write('\t'.join([platform_name, platform_url, organization_name, organization_url, proposal_title, str(proposal_start_time), str(proposal_end_time), proposal_proposed_name, voter_name, voter_opinion, voter_count, snapshot_time, snapshot_date])+'\n') | ||
|
||
|
||
def anal_dao_member(daoId, daoName, dao_members): | ||
for member in dao_members["members"]: | ||
organization_name = daoName | ||
organization_url = daoId | ||
member_username = str(member["name"]) | ||
member_address = str(member["address"]) | ||
member_tokens_share = str(member["tokenShares"]) | ||
member_share_percent = str(member["tokenSharesPercentage"]) | ||
member_proposals_created_num = str(member["proposalsCount"]) | ||
member_proposals_win_percent = str(member["proposalsWonCount"]) | ||
member_proposals_lost_percent = str(member["proposalsLostCount"]) | ||
member_voted_num = str(member["votesCount"]) | ||
member_voted_win_percent = str(member["votesWonCount"]) | ||
member_voted_lost_percent = str(member["votesLostCount"]) | ||
snapshot_time = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) | ||
snapshot_date = str(time.strftime("%Y-%m-%d", time.localtime())) | ||
with open("./dao_member.txt", "a+", encoding="utf-8") as f: | ||
f.write('\t'.join([platform_name, platform_url, organization_name, organization_url, member_username, member_address, member_tokens_share, member_share_percent, member_proposals_created_num, member_proposals_win_percent, member_proposals_lost_percent, member_voted_num, member_voted_win_percent, member_voted_lost_percent, snapshot_time, snapshot_date])+'\n') | ||
|
||
def anal_people_info(people_info,people_proposals, people_votes): | ||
|
||
people_organization = {} | ||
for dao_id in people_proposals: | ||
for proposal in people_proposals[dao_id]: | ||
people_organization[proposal["daoName"]] = {"votes":0,"votes_win":0,"proposals":0,"proposals_win":0} | ||
break | ||
for dao_id in people_votes: | ||
for vote in people_votes[dao_id]: | ||
people_organization[vote["daoName"]] = {"votes":0,"votes_win":0,"proposals":0,"proposals_win":0} | ||
break | ||
|
||
for dao_id in people_proposals: | ||
for proposal in people_proposals[dao_id]: | ||
people_organization[proposal["daoName"]]["proposals"] += 1 | ||
try: | ||
if proposal["successfulVote"]: | ||
people_organization[proposal["daoName"]]["proposals_win"] += 1 | ||
except Exception as e: | ||
pass | ||
for dao_id in people_votes: | ||
for vote in people_votes[dao_id]: | ||
people_organization[vote["daoName"]]["votes"] += 1 | ||
try: | ||
if vote["successful"]: | ||
people_organization[vote["daoName"]]["votes_win"] += 1 | ||
except Exception as e: | ||
pass | ||
people_username = people_info["name"] | ||
people_address = people_info["address"] | ||
people_participation_score = str(people_info["participationScore"]) | ||
people_dao_num = str(people_info["daoAmount"]) | ||
people_proposals_created = str(people_info["proposalsAmount"]) | ||
people_voted_num = str(people_info["votesAmount"]) | ||
for dao_name in people_organization: | ||
people_organization_name = dao_name | ||
people_organization_votes = str(people_organization[dao_name]["votes"]) | ||
people_organization_voted_win = str(people_organization[dao_name]["votes_win"]) | ||
people_organization_proposals = str(people_organization[dao_name]["proposals"]) | ||
people_organization_proposals_win = str(people_organization[dao_name]["proposals_win"]) | ||
snapshot_time = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) | ||
snapshot_date = str(time.strftime("%Y-%m-%d", time.localtime())) | ||
with open("./people_info.txt", "a+", encoding="utf-8") as f: | ||
f.write('\t'.join([platform_name,platform_url,people_username,people_address,people_participation_score,people_dao_num,people_proposals_created,people_voted_num,people_organization_name,people_organization_votes,people_organization_voted_win,people_organization_proposals,people_organization_proposals_win, snapshot_time, snapshot_date])+'\n') | ||
|
||
def anal_people_holds(people_list): | ||
for people in people_list: | ||
|
||
people_username = people["name"] | ||
people_address = people["address"] | ||
for token in people["daos"]["tokens"]: | ||
people_token_name = str(token["name"]) | ||
people_token_symbol = str(token["symbol"]) | ||
people_token_address = str(token["tokenAddress"]) | ||
people_token_value = str(token["value"]) | ||
people_token_usd = str(token["usd"]) | ||
people_token_usd_percent = str(token["usdVolumePercent"]) | ||
snapshot_time = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) | ||
snapshot_date = str(time.strftime("%Y-%m-%d", time.localtime())) | ||
with open("./people_holds.txt", "a+", encoding="utf-8") as f: | ||
f.write('\t'.join([platform_name,platform_url,people_username,people_address,people_token_name,people_token_symbol,people_token_address,people_token_value,people_token_usd,people_token_usd_percent, snapshot_time, snapshot_date])+'\n') | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
import json | ||
import requests | ||
import time | ||
import random | ||
import anal | ||
|
||
payload = {} | ||
headers = { | ||
'authority': 'golden-gate-server.deepdao.io', | ||
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="99", "Google Chrome";v="99"', | ||
'sec-ch-ua-mobile': '?0', | ||
'sec-ch-ua-platform': '"Windows"', | ||
'upgrade-insecure-requests': '1', | ||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36', | ||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', | ||
'sec-fetch-site': 'none', | ||
'sec-fetch-mode': 'navigate', | ||
'sec-fetch-user': '?1', | ||
'sec-fetch-dest': 'document', | ||
'accept-language': 'zh-CN,zh;q=0.9,zh-TW;q=0.8,en-US;q=0.7,en;q=0.6' | ||
} | ||
|
||
|
||
def try_get(url, payload, headers): | ||
|
||
while True: | ||
try: | ||
response = requests.request("GET", url, headers=headers, data=payload, timeout=10) | ||
except Exception as e: | ||
print(e) | ||
else: | ||
time.sleep(random.randint(1, 3)) | ||
return json.loads(response.text) | ||
|
||
|
||
def get_proposal_voter(proposal_id): | ||
payload = json.dumps({ | ||
"operationName": "Votes", | ||
"variables": { | ||
"id": proposal_id, | ||
"orderBy": "vp", | ||
"orderDirection": "desc", | ||
"first": 100000, | ||
"skip": 0 | ||
}, | ||
"query": "query Votes($id: String!, $first: Int, $skip: Int, $orderBy: String, $orderDirection: OrderDirection, $voter: String) {\n votes(\n first: $first\n skip: $skip\n where: {proposal: $id, vp_gt: 0, voter: $voter}\n orderBy: $orderBy\n orderDirection: $orderDirection\n ) {\n ipfs\n voter\n choice\n vp\n vp_by_strategy\n }\n}" | ||
}) | ||
headers = { | ||
'authority': 'hub.snapshot.org', | ||
'accept': '*/*', | ||
'accept-language': 'zh-CN,zh;q=0.9,zh-TW;q=0.8,en-US;q=0.7,en;q=0.6', | ||
'content-type': 'application/json', | ||
'origin': 'https://snapshot.org', | ||
'referer': 'https://snapshot.org/', | ||
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"', | ||
'sec-ch-ua-mobile': '?0', | ||
'sec-ch-ua-platform': '"Windows"', | ||
'sec-fetch-dest': 'empty', | ||
'sec-fetch-mode': 'cors', | ||
'sec-fetch-site': 'same-site', | ||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36' | ||
} | ||
url = "https://hub.snapshot.org/graphql" | ||
while True: | ||
try: | ||
response = requests.request("POST", url, headers=headers, data=payload, timeout=10) | ||
except Exception as e: | ||
print(e) | ||
else: | ||
time.sleep(random.randint(1, 3)) | ||
return json.loads(response.text) | ||
|
||
|
||
if __name__ == "__main__": | ||
# 采集dao的id,并按照资金量排序 | ||
dao_list = try_get("https://golden-gate-server.deepdao.io/dashboard/ksdf3ksa-937slj3", payload, headers) | ||
sorted_daos = [] | ||
for dao in dao_list["daosSummary"]: | ||
sorted_daos.append((float(dao["totalValueUSD"]) if dao["totalValueUSD"] else 0.0, dao["organizationId"], dao["daoName"])) | ||
sorted_daos.sort(reverse=True) | ||
|
||
# 采集TOP100 dao的基础信息,并解析至dao_list表 | ||
for dao in sorted_daos[:100]: | ||
dao_info = try_get("https://golden-gate-server.deepdao.io/organization/ksdf3ksa-937slj3/{}".format(dao[1]), payload, headers) | ||
anal.anal_dao_list(dao[1], dao[2], dao_info) | ||
|
||
# 采集TOP10 dao的全部提案,并解析至dao_proposal_list表 | ||
for dao in sorted_daos[:10]: | ||
dao_id = try_get("https://golden-gate-server.deepdao.io/organization/ksdf3ksa-937slj3/{}/dao".format(dao[1]), payload, headers)["data"][0]["daoId"] | ||
dao_proposals = try_get("https://golden-gate-server.deepdao.io/organization/ksdf3ksa-937slj3/{}/governance/decisions".format(dao_id), payload, headers) | ||
anal.anal_dao_proposal_list(dao[1], dao[2], dao_proposals) | ||
# 采集TOP1 dao的全部提案的投票,并解析至dao_proposal_voter_list表 | ||
dao_id = try_get("https://golden-gate-server.deepdao.io/organization/ksdf3ksa-937slj3/{}/dao".format(sorted_daos[0][1]), payload, headers)["data"][0]["daoId"] | ||
dao_proposals = try_get("https://golden-gate-server.deepdao.io/organization/ksdf3ksa-937slj3/{}/governance/decisions".format(dao_id), payload, headers) | ||
for proposal in dao_proposals["decisions"]: | ||
proposal_voters = get_proposal_voter(proposal["id"]) | ||
anal.anal_proposal_voter_list(sorted_daos[0][1], sorted_daos[0][2], proposal, proposal_voters) | ||
|
||
# 采集TOP10 dao的成员信息,并解析至dao_member表 | ||
for dao in sorted_daos[:10]: | ||
dao_id = try_get("https://golden-gate-server.deepdao.io/organization/ksdf3ksa-937slj3/{}/dao".format(dao[1]), payload, headers)["data"][0]["daoId"] | ||
dao_members = try_get("https://golden-gate-server.deepdao.io/organization/ksdf3ksa-937slj3/{}/members".format(dao_id), payload, headers) | ||
anal.anal_dao_member(dao[1], dao[2], dao_members) | ||
|
||
# 采集TOP100 用户信息,并解析至people_info表 | ||
for offset in (0,50): | ||
people_list = try_get("https://golden-gate-server.deepdao.io/people/top?limit=50&offset={}&sortBy=participationScore".format(offset), payload, headers) | ||
for people in people_list[:10]: | ||
people_proposals = try_get("https://golden-gate-server.deepdao.io/user/2/{}/proposals".format(people["address"]), payload, headers) | ||
people_votes = try_get("https://golden-gate-server.deepdao.io/user/2/{}/votes".format(people["address"]), payload, headers) | ||
anal.anal_people_info(people, people_proposals, people_votes) | ||
|
||
# 采集TOP100 用户的代币持有信息,并解析至people_holds表 | ||
for offset in (0,50): | ||
people_list = try_get("https://golden-gate-server.deepdao.io/people/top?limit=50&offset={}&sortBy=participationScore".format(offset), payload, headers) | ||
anal.anal_people_holds(people_list) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import pymysql | ||
from pymysql.converters import escape_string | ||
|
||
# 数据库连接信息 | ||
db = pymysql.connect( | ||
host="119.91.192.183", | ||
port=3306, | ||
database="deepdao", | ||
user="debian-sys-maint", | ||
password="123456", | ||
charset='utf8mb4' | ||
) | ||
cursor = db.cursor() | ||
|
||
#将sql推送至数据库执行 | ||
def sqltodb(sql_str): | ||
sql_str = sql_str[:-1]+";" | ||
try: | ||
cursor.execute(sql_str) | ||
db.commit() | ||
except Exception as e: | ||
print("sql install error", e) | ||
print(sql_str) | ||
else: | ||
print("********sql上传成功*********") | ||
|
||
# 提取txt,将txt字段拼接成sql-insert语句 | ||
def txt_db(file,sql_str): | ||
for line in open(file, encoding="utf-8"): | ||
sql_str+="(null" | ||
res = line[:-1].split("\t") | ||
for obj in res: | ||
sql_str += ", '%s'" % (escape_string(obj)) | ||
sql_str += ")," | ||
sqltodb(sql_str) | ||
|
||
|
||
if __name__ == "__main__": | ||
|
||
# 将6个txt文件使用不同的sql语句执行入库 | ||
txt_db("./dao_list.txt","INSERT INTO dao_list (id, platform_name, platform_url, organization_name, organization_url, treasury_count, members_num, proposals_num, snapshot_time, snapshot_date) VALUES ") | ||
txt_db("./dao_member.txt","INSERT INTO dao_member (id, platform_name, platform_url, organization_name, organization_url, member_username, member_address, member_tokens_share, member_share_percent, member_proposals_created_num, member_proposals_win_percent, member_proposals_lost_percent, member_voted_num, member_voted_win_percent, member_voted_lost_percent, snapshot_time, snapshot_date) VALUES ") | ||
txt_db("./dao_proposal_list.txt","INSERT INTO dao_proposal_list (id, platform_name, platform_url, organization_name, organization_url, proposal_title, proposal_content, proposal_start_time, proposal_end_time, proposal_proposed_name, proposal_outcome, proposal_votes_num, proposal_for_num, proposal_against_num, snapshot_time, snapshot_date) VALUES ") | ||
txt_db("./proposal_voter_list.txt","INSERT INTO dao_proposal_voter_list (id, platform_name, platform_url, organization_name, organization_url, proposal_title, proposal_start_time, proposal_end_time, proposal_proposed_name, voter_name, voter_opinion, voter_count, snapshot_time, snapshot_date) VALUES ") | ||
txt_db("./people_holds.txt","INSERT INTO people_holds (id, platform_name, platform_url, people_username, people_address, people_token_name, people_token_symbol, people_token_address, people_token_value, people_token_usd, people_token_usd_percent, snapshot_time, snapshot_date) VALUES ") | ||
txt_db("./people_info.txt","INSERT INTO people_info (id, platform_name, platform_url, people_username, people_address, people_participation_score, people_dao_num, people_proposals_created, people_voted_num, people_organization_name, people_organization_votes, people_organization_voted_win, people_organization_proposals, people_organization_proposals_win, snapshot_time, snapshot_date) VALUES ") | ||
|