import requests from bs4 import BeautifulSoup url = "https://www.biqiuge8.com/book/24276/15316323.html" while True: try: kv = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER'} r = requests.get(url,headers = kv) r.raise_for_status() r.encoding = r.apparent_encoding html = r.text except: print("当时就没打开这页"+url) soup = BeautifulSoup(html,"html.parser") title = soup.select('.content > h1:nth-child(1)')[0].text title1 = "\r\n\r\n\r\n\r\n\r\n "+title+"\r\n\r\n\r\n\r\n " text = soup.select('#content')[0].text.split("https")[0] text = '\r\n\r\n '.join(text.split()) with open("元尊.txt",'a',encoding = 'utf-8') as f: f.write(title1) f.write(text) print(title+"下载完成") href = soup.select('.page_chapter > ul:nth-child(1) > li:nth-child(3) > a:nth-child(1)')[0]['href'] url = 'https://www.biqiuge8.com' + href if href == '/book/24276/': print("下载完成") break
# -*- coding:utf-8 -*- import tkinter,sys,os import webbrowser import requests from lxml import etree #资料引入 def resource_path(relative_path): '''返回资源绝对路径。''' if hasattr(sys, '_MEIPASS'): # PyInstaller会创建临时文件夹temp # 并把路径存储在_MEIPASS中 base_path = sys._MEIPASS else: base_path = os.path.abspath('.') return os.path.join(base_path, relative_path) logo = resource_path('1.ico') def getHTMLText(url): try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER', "cookie":"hng=CN%7Czh-CN%7CCNY%7C156; thw=cn; enc=sjA56zcJ9u%2F8HHfzWeGenkosE0sTJKOlrlE3PMUsXa4T4ucIGh7utUvQoTVW1UqNtT0z0zaGdAm6xBhWgTSwhQ%3D%3D; miid=202266272065909521; tracknick=zhongkangtb; _cc_=U%2BGCWk%2F7og%3D%3D; tg=0; cna=Q8BkEwdNjwACASrHh4ywLeIq; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; t=495b1ae6d656f25318235b91cf412b1a; cookie2=1e352b66734f3c1b206751d815d354ed; v=0; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; _tb_token_=e53945567abe7; mt=ci%3D-1_1; JSESSIONID=694C8BC65EFD5DCBCB5BD0FFFF0A8D54; isg=BOXl0kSA1LkPtzG_hnKhESi05qHfioaHPErXpufJqJwr_gRwr3FyhCfciCItfrFs; l=cBIvTKdVvmfdhDbzBOfZVuI81hQtoQ908sPzw4swkICP9wCH50UfWZeIGwLMCnGVK6SDR3Sq8BObB0LNuyCqJxpsw3k_J_f.." } r = requests.get(url,headers = headers,timeout = 30) r.raise_for_status() r.encoding = r.apparent_encoding return r.text except: print("爬取失败") #'http://poedb.tw/tw/unique.php?n=Hands_of_the_High_Templar' def 标准联盟(url): html = etree.HTML(getHTMLText(url)) res = html.xpath("/html/body/div[5]/div/table[2]/tbody//tr//td[1]/text()") 价格 = [i for i in res] res1 = html.xpath("/html/body/div[5]/div/table[2]/tbody//tr//td[2]/text()") 数量= [i for i in res1] 标准 = [[价格[i],数量[i]] for i in range(len(价格))] return 标准 def 赛季联盟(url): html = etree.HTML(getHTMLText(url)) res3 = html.xpath("/html/body/div[5]/div/table[3]/tbody//tr//td[1]/text()") 价格1 = [i for i in res3] res4 = html.xpath("/html/body/div[5]/div/table[3]/tbody//tr//td[2]/text()") 数量1= [i for i in res4] 赛季 = [[价格1[i],数量1[i]] for i in range(len(价格1))] return 赛季 list1 = [{'ch_name': '北方雷霆聖杖', 'en_name': 'Agnerod North'},] #映射文件 def 获得url(): r = tkinter.Tk() r.withdraw() try: st = r.clipboard_get().split('\n')[1] for i in list1: if st == i["ch_name"]: name = st url = f'http://poedb.tw/tw/unique.php?n={i["en_name"]}' #text1.delete(0.0, tkinter.END) #text1.insert(tkinter.INSERT, "OK正在打开网页") #webbrowser.open(url) res = [name,url] return res else: text1.delete(0.0, tkinter.END) text1.insert(tkinter.INSERT, "我居然没有找到这件传奇") except: text1.delete(0.0, tkinter.END) text1.insert(tkinter.INSERT, "你复制这个东西似乎有点问题啊") ''' 西里的戰衣 聖宗神手 ''' def 显示价格(): name = 获得url()[0] text1.delete(0.0, tkinter.END) text1.insert(tkinter.INSERT, name +'\n') text1.insert(tkinter.INSERT, '\r\n标准联盟'+'\n') for i in 标准联盟(获得url()[1]): text1.insert(tkinter.INSERT, "%-15s %-5s \n"%(i[0],i[1])) text1.insert(tkinter.INSERT, '\r\n赛季联盟'+'\n') for j in 赛季联盟(获得url()[1]): text1.insert(tkinter.INSERT, "%-15s %-5s \n"%(j[0],j[1])) def 打开网页(): text1.delete(0.0, tkinter.END) text1.insert(tkinter.INSERT, "OK正在打开网页") webbrowser.open(获得url()[1]) win = tkinter.Tk() win.iconbitmap(logo) win.title("挖挖") win.geometry('180x400+900+300') win.resizable(0, 0) tkinter.Button(win,text = "显示价格",command = 显示价格).pack(fill='both') text1 = tkinter.Text(win,width=59,height=26) text1.pack() tkinter.Button(win,text = "打开网页链接",command = 打开网页).pack(fill='both') win.mainloop()
import json import requests def 爬虫获取post数据(): #登陆url = 'user/login' postUrl = '' # payloadData数据 payloadData = { '': "" } # data={ # '': "", # '': "", # } # 请求头设置 payloadHeader = { 'Host': '', 'Origin': '', #'Cookie': '', 'Referer': '', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36', 'Content-Type': 'application/json', 'Content-Length': '32', 'Connection': 'keep-alive', 'Authorization': 'bearer eyJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE1NDE1OTQ4NjAsInN1YiI6ImJ3eWRiZyJ9.t_XDqE2CaCSxg_Mtw5BrgUCTFpjpmU9oyI32oGPO9wY', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Accept-Encoding': 'gzip, deflate', 'Accept': 'application/json, text/plain, */*', } # 下载超时 timeOut = 25 #session=requests.session() #session.post(登陆url,headers=payloadHeader,data=data) #r = requests.post(postUrl, data=json.dumps(payloadData), headers=payloadHeader) dumpJsonData = json.dumps(payloadData) #res = session.post(postUrl, data=dumpJsonData, headers=payloadHeader, timeout=timeOut, allow_redirects=False) res = requests.post(postUrl, data=dumpJsonData, headers=payloadHeader, timeout=timeOut, allow_redirects=False) # 下面这种直接填充json参数的方式也OK # res = requests.post(postUrl, json=payloadData, headers=header) #print(f"responseTime = {datetime.datetime.now()}, statusCode = {res.status_code}, res text = {res.text}") res_dict = eval(res.text) 信息列表 = [] for i in res_dict['obj']: 信息列表.append(i['khmc']+'--') return 信息列表
布丁
易将木剑交给了师父,两人盘腿坐在大树下,师父从怀里摸出一块铁片,在木剑上刻着字。
“无什么……什么什么……易大什么?”易抓着脑袋,念着师父刻的字。
“这些字读,「无极剑派第三百六十九位传人,易大师」。叫你好好读书,连这些字都不认识。”师父敲着易的脑袋说。
“明明是你字太烂……弯弯曲曲的像蚯蚓一样。”易撅着嘴,摸着脑袋,委屈的说。
“是吗,哈哈……”师父摸着脑袋,尴尬的笑着。
“师父,为什么要刻个易大师呢?易就可以了啊。”
“因为带有大师的人,都是很厉害的人。为师希望你以后能将无极剑派的思想发扬光大,所以才刻上大师两个字,这是为师对你的期望。”
“徒儿绝不会辜负师父的期望!”