1 #-*- coding:UTF-8 -*-
2 importrequests3 from lxml importetree4 importsys5 importio6 importos7
8
9 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030')10
11
12 classCnBlogs:13 """"14 Auth:reader15 发表地址:https://www.cnblogs.com/reader/p/11487398.html16 作者地址:https://www.cnblogs.com/reader17 """
18 def __init__(self):19 self.headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'}20
21 self.target_domain = "https://www.cnblogs.com"
22 self.page = 1
23 self.lists ={}24
25 defclearscreen(self):26 """根据系统,清屏操作"""
27 #window下的清屏方式
28 os.system("cls")29
30 defset_target_url(self, page):31 if page == 1:32 self.target_url =self.target_domain33 else:34 self.target_url = 'https://www.cnblogs.com/sitehome/p/'+str(page)35
36 defdownload(self, page):37 """下载html页面内容"""
38 self.set_target_url(page)39 response = requests.get(self.target_url, headers=self.headers)40 if response.status_code == 200:41 returnresponse.content42 else:43 print("download fail")44 return ""
45
46 defisascii(self, ch):47 return ch <= u'\u007f'
48
49 defformatByWidth(self, text, width):50 """格式化字符串长度"""
51 count =052 for u intext:53 if notself.isascii(u):54 count += 1
55 return text + " " * (width - count -len(text))56
57 defparse(self, content):58 """解析HTML内容"""
59 html =etree.HTML(content)60 lists = html.xpath('//div[@id="post_list"]//div[@class="post_item_body"]')61
62 delhtml63 k = 1
64 print('+', '--' * 50, '+')65 print('|', str("当前页码:"+str(self.page)).ljust(95), '|')66 print('+', '--' * 50, '+')67 for li inlists:68 title = str(li.xpath('h3/a/text()')[0])69 link = li.xpath('h3/a/@href')[0]70 desc = li.xpath('p')[0].xpath('string(.)')71
72 self.lists[k] ={73 'title': title,74 'desc': desc.strip(),75 'link': link76 }77
78 print('|', k, self.formatByWidth(title, 100-1-len(str(k))), '|')79 k += 1
80 dellists81 print('+', '--' * 50, '+')82
83 defdescopt(self, k):84 """读取详情"""
85 k =int(k)86 if k not inself.lists.keys():87 return
88 self.clearscreen()89 print('+', '--' * 50, '+')90 print('|', self.formatByWidth(self.lists[k]['title'], 100), '|')91 print('+', '--' * 50, '+')92 print('|', self.formatByWidth(self.lists[k]['link'], 100), '|')93 print('+', '--' * 50, '+')94
95 print('|', self.formatByWidth(self.lists[k]['desc'], 100), '|')96
97 print('+', '--' * 50, '+')98 input("输入任意键返回...\r\n")99
100 defreadopt(self):101 """开始阅读操作"""
102 whileTrue:103 self.clearscreen()104 print("\r\n")105 html = self.download(page=self.page)106 self.parse(html)107
108 print("[N]:下一页,[B]:上一页,[H]:首页,[D {num}]:简述, [Q]:返回")109
110 cmd = input("请输入操作编号[N、B、H、D、Q]:")111
112 if cmd == 'Q' or cmd == 'q': #返回
113 break
114 elif cmd == 'N' or cmd == 'n': #下一页
115 self.page += 1
116 elif cmd == 'B' or cmd == 'b': #上一页
117 self.page -= 1
118 if self.page <=0:119 self.page = 1
120 elif cmd == 'H' or cmd == 'h': #首页
121 self.page = 1
122 else:123 cmd = cmd.split(' ')124
125 if len(cmd) != 2:126 continue
127 #读取简述
128 if cmd[0] == 'D' or cmd[0] == 'd':129 self.descopt(cmd[1])130
131 defaboutopt(self):132 self.clearscreen()133 print("博客园地址: https://www.cnblogs.com/reader\r\n")134 input("输入任意键返回...\r\n")135
136 defstart(self):137 self.clearscreen()138 whileTrue:139 print('+', '--'*50, '+')140 print('|', "欢迎使用博客园阅读器(reader 开发)".center(88), '|')141 print('+', '--' * 50, '+')142 print('|', "[1]:开始阅读".center(95), '|')143 print('|', "[2]:关于作者".center(95), '|')144 print('|', "[Q]:退出软件".center(95), '|')145 print('+', '--' * 50, '+')146
147 cmd = input("请输入操作编号[1、2、Q]:")148 if cmd == '1':149 self.readopt()150 elif cmd == '2':151 self.aboutopt()152 elif cmd == 'Q' or cmd == 'q':153 break
154
155 os.system("cls")156
157 print("已退出,欢迎使用!")158
159
160 if __name__ == "__main__":161 obj =CnBlogs()162 obj.start()