Skip to content

Commit

Permalink
add code:v1
Browse files Browse the repository at this point in the history
  • Loading branch information
FTLIKON committed Apr 13, 2022
1 parent 0bea3a8 commit d70ebbb
Show file tree
Hide file tree
Showing 4 changed files with 333 additions and 2 deletions.
30 changes: 28 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,28 @@
# deepdao-spider
test

---

# **deepdao数据爬虫+入库**

## 代码结构
```
├── anal.py -> 将数据解析至本地txt
├── main.py -> 爬虫主代码
└── todb.py -> 将本地txt数据入库
└── README.md
```

## 依赖环境
- python 3.6.4
- requests
- pymysql

## 项目运行
1. 拉取本项目,进入项目目录。
2. 开始抓取。抓取过程中会在当前目录下生成几个解析的txt,便于备份数据以及校验。
```bash
python3 main.py
```
3. 等待抓取完成后,确认生成了6个txt文件(分别对应6个数据表),即可开始执行入库脚本。
```bash
python3 todb.py
```
141 changes: 141 additions & 0 deletions anal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import datetime
import imp
import json
import time
from pymysql.converters import escape_string

platform_name = "deepdao"
platform_url = "https://deepdao.io/"

def anal_dao_list(daoId, daoName, dao_info):

organization_name = daoName
organization_url = daoId
treasury_count = str(dao_info["data"]["aum"])
members_num = str(dao_info["data"]["membersCount"])
proposals_num = str(dao_info["data"]["proposalsCount"])
snapshot_time = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
snapshot_date = str(time.strftime("%Y-%m-%d", time.localtime()))
with open("./dao_list.txt", "a+", encoding="utf-8") as f:
f.write('\t'.join([platform_name, platform_url, organization_name, organization_url, treasury_count, members_num, proposals_num, snapshot_time, snapshot_date])+'\n')


def anal_dao_proposal_list(daoId, daoName, dao_proposals):

for proposal in dao_proposals["decisions"]:
organization_name = daoName
organization_url = daoId
proposal_title = escape_string(proposal["title"]).replace("\t","")
proposal_content = escape_string(proposal["description"]).replace("\t","")
proposal_start_time = datetime.datetime.strptime(proposal["createdAt"][:-5], "%Y-%m-%dT%H:%M:%S")
proposal_end_time = proposal_start_time+datetime.timedelta(days=3)
proposal_proposed_name = str(proposal["proposer"])
proposal_outcome = str(proposal["status"])
proposal_votes_num = str(proposal["votes"])
proposal_for_num = str(proposal["sharesFor"])
proposal_against_num = str(proposal["sharesAgainst"])
snapshot_time = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
snapshot_date = str(time.strftime("%Y-%m-%d", time.localtime()))
with open("./dao_proposal_list.txt", "a+", encoding="utf-8") as f:
f.write('\t'.join([platform_name, platform_url, organization_name, organization_url, proposal_title, proposal_content, str(proposal_start_time), str(proposal_end_time), proposal_proposed_name, proposal_outcome, proposal_votes_num, proposal_for_num, proposal_against_num, snapshot_time, snapshot_date])+'\n')


def anal_proposal_voter_list(daoId, daoName, proposal, proposal_voters):

for voter in proposal_voters["data"]["votes"]:
organization_name = daoName
organization_url = daoId
proposal_title = escape_string(proposal["title"])
proposal_start_time = datetime.datetime.strptime(proposal["createdAt"][:-5], "%Y-%m-%dT%H:%M:%S")
proposal_end_time = proposal_start_time+datetime.timedelta(days=3)
proposal_proposed_name = str(proposal["proposer"])
voter_name = str(voter["voter"])
voter_opinion = str(voter["choice"])
voter_count = str(voter["vp"])
snapshot_time = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
snapshot_date = str(time.strftime("%Y-%m-%d", time.localtime()))
with open("./proposal_voter_list.txt", "a+", encoding="utf-8") as f:
f.write('\t'.join([platform_name, platform_url, organization_name, organization_url, proposal_title, str(proposal_start_time), str(proposal_end_time), proposal_proposed_name, voter_name, voter_opinion, voter_count, snapshot_time, snapshot_date])+'\n')


def anal_dao_member(daoId, daoName, dao_members):
for member in dao_members["members"]:
organization_name = daoName
organization_url = daoId
member_username = str(member["name"])
member_address = str(member["address"])
member_tokens_share = str(member["tokenShares"])
member_share_percent = str(member["tokenSharesPercentage"])
member_proposals_created_num = str(member["proposalsCount"])
member_proposals_win_percent = str(member["proposalsWonCount"])
member_proposals_lost_percent = str(member["proposalsLostCount"])
member_voted_num = str(member["votesCount"])
member_voted_win_percent = str(member["votesWonCount"])
member_voted_lost_percent = str(member["votesLostCount"])
snapshot_time = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
snapshot_date = str(time.strftime("%Y-%m-%d", time.localtime()))
with open("./dao_member.txt", "a+", encoding="utf-8") as f:
f.write('\t'.join([platform_name, platform_url, organization_name, organization_url, member_username, member_address, member_tokens_share, member_share_percent, member_proposals_created_num, member_proposals_win_percent, member_proposals_lost_percent, member_voted_num, member_voted_win_percent, member_voted_lost_percent, snapshot_time, snapshot_date])+'\n')

def anal_people_info(people_info,people_proposals, people_votes):

people_organization = {}
for dao_id in people_proposals:
for proposal in people_proposals[dao_id]:
people_organization[proposal["daoName"]] = {"votes":0,"votes_win":0,"proposals":0,"proposals_win":0}
break
for dao_id in people_votes:
for vote in people_votes[dao_id]:
people_organization[vote["daoName"]] = {"votes":0,"votes_win":0,"proposals":0,"proposals_win":0}
break

for dao_id in people_proposals:
for proposal in people_proposals[dao_id]:
people_organization[proposal["daoName"]]["proposals"] += 1
try:
if proposal["successfulVote"]:
people_organization[proposal["daoName"]]["proposals_win"] += 1
except Exception as e:
pass
for dao_id in people_votes:
for vote in people_votes[dao_id]:
people_organization[vote["daoName"]]["votes"] += 1
try:
if vote["successful"]:
people_organization[vote["daoName"]]["votes_win"] += 1
except Exception as e:
pass
people_username = people_info["name"]
people_address = people_info["address"]
people_participation_score = str(people_info["participationScore"])
people_dao_num = str(people_info["daoAmount"])
people_proposals_created = str(people_info["proposalsAmount"])
people_voted_num = str(people_info["votesAmount"])
for dao_name in people_organization:
people_organization_name = dao_name
people_organization_votes = str(people_organization[dao_name]["votes"])
people_organization_voted_win = str(people_organization[dao_name]["votes_win"])
people_organization_proposals = str(people_organization[dao_name]["proposals"])
people_organization_proposals_win = str(people_organization[dao_name]["proposals_win"])
snapshot_time = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
snapshot_date = str(time.strftime("%Y-%m-%d", time.localtime()))
with open("./people_info.txt", "a+", encoding="utf-8") as f:
f.write('\t'.join([platform_name,platform_url,people_username,people_address,people_participation_score,people_dao_num,people_proposals_created,people_voted_num,people_organization_name,people_organization_votes,people_organization_voted_win,people_organization_proposals,people_organization_proposals_win, snapshot_time, snapshot_date])+'\n')

def anal_people_holds(people_list):
for people in people_list:

people_username = people["name"]
people_address = people["address"]
for token in people["daos"]["tokens"]:
people_token_name = str(token["name"])
people_token_symbol = str(token["symbol"])
people_token_address = str(token["tokenAddress"])
people_token_value = str(token["value"])
people_token_usd = str(token["usd"])
people_token_usd_percent = str(token["usdVolumePercent"])
snapshot_time = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
snapshot_date = str(time.strftime("%Y-%m-%d", time.localtime()))
with open("./people_holds.txt", "a+", encoding="utf-8") as f:
f.write('\t'.join([platform_name,platform_url,people_username,people_address,people_token_name,people_token_symbol,people_token_address,people_token_value,people_token_usd,people_token_usd_percent, snapshot_time, snapshot_date])+'\n')

117 changes: 117 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import json
import requests
import time
import random
import anal

payload = {}
headers = {
'authority': 'golden-gate-server.deepdao.io',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="99", "Google Chrome";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'sec-fetch-site': 'none',
'sec-fetch-mode': 'navigate',
'sec-fetch-user': '?1',
'sec-fetch-dest': 'document',
'accept-language': 'zh-CN,zh;q=0.9,zh-TW;q=0.8,en-US;q=0.7,en;q=0.6'
}


def try_get(url, payload, headers):

while True:
try:
response = requests.request("GET", url, headers=headers, data=payload, timeout=10)
except Exception as e:
print(e)
else:
time.sleep(random.randint(1, 3))
return json.loads(response.text)


def get_proposal_voter(proposal_id):
payload = json.dumps({
"operationName": "Votes",
"variables": {
"id": proposal_id,
"orderBy": "vp",
"orderDirection": "desc",
"first": 100000,
"skip": 0
},
"query": "query Votes($id: String!, $first: Int, $skip: Int, $orderBy: String, $orderDirection: OrderDirection, $voter: String) {\n votes(\n first: $first\n skip: $skip\n where: {proposal: $id, vp_gt: 0, voter: $voter}\n orderBy: $orderBy\n orderDirection: $orderDirection\n ) {\n ipfs\n voter\n choice\n vp\n vp_by_strategy\n }\n}"
})
headers = {
'authority': 'hub.snapshot.org',
'accept': '*/*',
'accept-language': 'zh-CN,zh;q=0.9,zh-TW;q=0.8,en-US;q=0.7,en;q=0.6',
'content-type': 'application/json',
'origin': 'https://snapshot.org',
'referer': 'https://snapshot.org/',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36'
}
url = "https://hub.snapshot.org/graphql"
while True:
try:
response = requests.request("POST", url, headers=headers, data=payload, timeout=10)
except Exception as e:
print(e)
else:
time.sleep(random.randint(1, 3))
return json.loads(response.text)


if __name__ == "__main__":
# 采集dao的id,并按照资金量排序
dao_list = try_get("https://golden-gate-server.deepdao.io/dashboard/ksdf3ksa-937slj3", payload, headers)
sorted_daos = []
for dao in dao_list["daosSummary"]:
sorted_daos.append((float(dao["totalValueUSD"]) if dao["totalValueUSD"] else 0.0, dao["organizationId"], dao["daoName"]))
sorted_daos.sort(reverse=True)

# 采集TOP100 dao的基础信息,并解析至dao_list表
for dao in sorted_daos[:100]:
dao_info = try_get("https://golden-gate-server.deepdao.io/organization/ksdf3ksa-937slj3/{}".format(dao[1]), payload, headers)
anal.anal_dao_list(dao[1], dao[2], dao_info)

# 采集TOP10 dao的全部提案,并解析至dao_proposal_list表
for dao in sorted_daos[:10]:
dao_id = try_get("https://golden-gate-server.deepdao.io/organization/ksdf3ksa-937slj3/{}/dao".format(dao[1]), payload, headers)["data"][0]["daoId"]
dao_proposals = try_get("https://golden-gate-server.deepdao.io/organization/ksdf3ksa-937slj3/{}/governance/decisions".format(dao_id), payload, headers)
anal.anal_dao_proposal_list(dao[1], dao[2], dao_proposals)
# 采集TOP1 dao的全部提案的投票,并解析至dao_proposal_voter_list表
dao_id = try_get("https://golden-gate-server.deepdao.io/organization/ksdf3ksa-937slj3/{}/dao".format(sorted_daos[0][1]), payload, headers)["data"][0]["daoId"]
dao_proposals = try_get("https://golden-gate-server.deepdao.io/organization/ksdf3ksa-937slj3/{}/governance/decisions".format(dao_id), payload, headers)
for proposal in dao_proposals["decisions"]:
proposal_voters = get_proposal_voter(proposal["id"])
anal.anal_proposal_voter_list(sorted_daos[0][1], sorted_daos[0][2], proposal, proposal_voters)

# 采集TOP10 dao的成员信息,并解析至dao_member表
for dao in sorted_daos[:10]:
dao_id = try_get("https://golden-gate-server.deepdao.io/organization/ksdf3ksa-937slj3/{}/dao".format(dao[1]), payload, headers)["data"][0]["daoId"]
dao_members = try_get("https://golden-gate-server.deepdao.io/organization/ksdf3ksa-937slj3/{}/members".format(dao_id), payload, headers)
anal.anal_dao_member(dao[1], dao[2], dao_members)

# 采集TOP100 用户信息,并解析至people_info表
for offset in (0,50):
people_list = try_get("https://golden-gate-server.deepdao.io/people/top?limit=50&offset={}&sortBy=participationScore".format(offset), payload, headers)
for people in people_list[:10]:
people_proposals = try_get("https://golden-gate-server.deepdao.io/user/2/{}/proposals".format(people["address"]), payload, headers)
people_votes = try_get("https://golden-gate-server.deepdao.io/user/2/{}/votes".format(people["address"]), payload, headers)
anal.anal_people_info(people, people_proposals, people_votes)

# 采集TOP100 用户的代币持有信息,并解析至people_holds表
for offset in (0,50):
people_list = try_get("https://golden-gate-server.deepdao.io/people/top?limit=50&offset={}&sortBy=participationScore".format(offset), payload, headers)
anal.anal_people_holds(people_list)

47 changes: 47 additions & 0 deletions todb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import pymysql
from pymysql.converters import escape_string

# 数据库连接信息
db = pymysql.connect(
host="119.91.192.183",
port=3306,
database="deepdao",
user="debian-sys-maint",
password="123456",
charset='utf8mb4'
)
cursor = db.cursor()

#将sql推送至数据库执行
def sqltodb(sql_str):
sql_str = sql_str[:-1]+";"
try:
cursor.execute(sql_str)
db.commit()
except Exception as e:
print("sql install error", e)
print(sql_str)
else:
print("********sql上传成功*********")

# 提取txt,将txt字段拼接成sql-insert语句
def txt_db(file,sql_str):
for line in open(file, encoding="utf-8"):
sql_str+="(null"
res = line[:-1].split("\t")
for obj in res:
sql_str += ", '%s'" % (escape_string(obj))
sql_str += "),"
sqltodb(sql_str)


if __name__ == "__main__":

# 将6个txt文件使用不同的sql语句执行入库
txt_db("./dao_list.txt","INSERT INTO dao_list (id, platform_name, platform_url, organization_name, organization_url, treasury_count, members_num, proposals_num, snapshot_time, snapshot_date) VALUES ")
txt_db("./dao_member.txt","INSERT INTO dao_member (id, platform_name, platform_url, organization_name, organization_url, member_username, member_address, member_tokens_share, member_share_percent, member_proposals_created_num, member_proposals_win_percent, member_proposals_lost_percent, member_voted_num, member_voted_win_percent, member_voted_lost_percent, snapshot_time, snapshot_date) VALUES ")
txt_db("./dao_proposal_list.txt","INSERT INTO dao_proposal_list (id, platform_name, platform_url, organization_name, organization_url, proposal_title, proposal_content, proposal_start_time, proposal_end_time, proposal_proposed_name, proposal_outcome, proposal_votes_num, proposal_for_num, proposal_against_num, snapshot_time, snapshot_date) VALUES ")
txt_db("./proposal_voter_list.txt","INSERT INTO dao_proposal_voter_list (id, platform_name, platform_url, organization_name, organization_url, proposal_title, proposal_start_time, proposal_end_time, proposal_proposed_name, voter_name, voter_opinion, voter_count, snapshot_time, snapshot_date) VALUES ")
txt_db("./people_holds.txt","INSERT INTO people_holds (id, platform_name, platform_url, people_username, people_address, people_token_name, people_token_symbol, people_token_address, people_token_value, people_token_usd, people_token_usd_percent, snapshot_time, snapshot_date) VALUES ")
txt_db("./people_info.txt","INSERT INTO people_info (id, platform_name, platform_url, people_username, people_address, people_participation_score, people_dao_num, people_proposals_created, people_voted_num, people_organization_name, people_organization_votes, people_organization_voted_win, people_organization_proposals, people_organization_proposals_win, snapshot_time, snapshot_date) VALUES ")

0 comments on commit d70ebbb

Please sign in to comment.