From 15f4872045e7a2bce569adafa3e4cc2e4c7feafd Mon Sep 17 00:00:00 2001 From: minami9 Date: Mon, 13 Dec 2021 16:41:00 +0800 Subject: [PATCH 1/3] fix: uid=1,crash --- weibo_spider/parser/info_parser.py | 9 ++++----- weibo_spider/parser/util.py | 2 ++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/weibo_spider/parser/info_parser.py b/weibo_spider/parser/info_parser.py index 6928c7c2..90164554 100644 --- a/weibo_spider/parser/info_parser.py +++ b/weibo_spider/parser/info_parser.py @@ -35,9 +35,9 @@ def extract_user_info(self): if i.split(':', 1)[0] in zh_list: setattr(user, en_list[zh_list.index(i.split(':', 1)[0])], i.split(':', 1)[1].replace('\u3000', '')) - - if self.selector.xpath( - "//div[@class='tip'][2]/text()")[0] == u'学习经历': + + experienced = self.selector.xpath("//div[@class='tip'][2]/text()") + if experienced and experienced[0] == u'学习经历': user.education = self.selector.xpath( "//div[@class='c'][4]/text()")[0][1:].replace( u'\xa0', u' ') @@ -46,8 +46,7 @@ def extract_user_info(self): user.work = self.selector.xpath( "//div[@class='c'][5]/text()")[0][1:].replace( u'\xa0', u' ') - elif self.selector.xpath( - "//div[@class='tip'][2]/text()")[0] == u'工作经历': + elif experienced and experienced[0] == u'工作经历': user.work = self.selector.xpath( "//div[@class='c'][4]/text()")[0][1:].replace( u'\xa0', u' ') diff --git a/weibo_spider/parser/util.py b/weibo_spider/parser/util.py index 8e03f6f2..6ce6be70 100644 --- a/weibo_spider/parser/util.py +++ b/weibo_spider/parser/util.py @@ -106,6 +106,8 @@ def to_video_download_url(cookie, video_page_url): def string_to_int(string): """字符串转换为整数""" + if len(string) == 0: + return 0 if isinstance(string, int): return string elif string.endswith(u'万+'): From 33b324a404760181f825498225c85e8f74416b3e Mon Sep 17 00:00:00 2001 From: minami9 Date: Mon, 13 Dec 2021 16:52:22 +0800 Subject: [PATCH 2/3] add: warning when stringtoint get a empty string --- weibo_spider/parser/util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/weibo_spider/parser/util.py b/weibo_spider/parser/util.py index 6ce6be70..82e0f10c 100644 --- a/weibo_spider/parser/util.py +++ b/weibo_spider/parser/util.py @@ -107,6 +107,7 @@ def to_video_download_url(cookie, video_page_url): def string_to_int(string): """字符串转换为整数""" if len(string) == 0: + print("Warning: the input string is empty!") return 0 if isinstance(string, int): return string From 0061c7cfe8b4f6f1644796e3e7471ae837cf172a Mon Sep 17 00:00:00 2001 From: minami9 Date: Tue, 14 Dec 2021 00:19:08 +0800 Subject: [PATCH 3/3] add: use logger replace print in function string_to_int --- weibo_spider/parser/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weibo_spider/parser/util.py b/weibo_spider/parser/util.py index 82e0f10c..3169f24e 100644 --- a/weibo_spider/parser/util.py +++ b/weibo_spider/parser/util.py @@ -107,7 +107,7 @@ def to_video_download_url(cookie, video_page_url): def string_to_int(string): """字符串转换为整数""" if len(string) == 0: - print("Warning: the input string is empty!") + logger.warning("string to int, the input string is empty!") return 0 if isinstance(string, int): return string