邮件 爬虫 tkinter win32 selenium django os excel re list pandas

import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication
import os,datetime
_user = "---"
_pwd  = "---"
_to   = "---"
today = datetime.date.today().strftime('%y%m%d')
msg = MIMEMultipart()
msg["Subject"] = today+'数据库备份'
msg["From"]    = _user
msg["To"]      = _to
part = MIMEText('这是'+ today + '份的数据文件')
msg.attach(part)
#---这是附件部分---
for name in os.listdir():
    if "db.sqlite3" in name :
        part = MIMEApplication(open(name,'rb').read())
        part.add_header('Content-Disposition', 'attachment', filename=today+'.sqlite3')
        msg.attach(part)
        break
s = smtplib.SMTP("smtp.qq.com", timeout=60)
try:
    s.login(_user, _pwd)
    s.sendmail(_user, _to, msg.as_string())
    s.close()
    print("发送成功")
except:
    print("发送失败")


# 同文件夹下发送邮件.sh内容
'''
#!/bin/bash
cd /home/sites/173.82.120.157/MasterYi_Django_blog&&python3 发送邮件.py


crontab -e
19 1 * * * sh /home/sites/173.82.120.157/MasterYi_Django_blog/发送邮件.sh
'''


import requests
from bs4 import BeautifulSoup

url = "https://www.biqiuge8.com/book/24276/15316323.html"
while True:
    try:
        kv = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER'}
        r = requests.get(url,headers = kv)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        html = r.text
    except:
        print("当时就没打开这页"+url)
    soup = BeautifulSoup(html,"html.parser")
    title = soup.select('.content > h1:nth-child(1)')[0].text
    title1 = "\r\n\r\n\r\n\r\n\r\n    "+title+"\r\n\r\n\r\n\r\n    "
    text = soup.select('#content')[0].text.split("https")[0]
    text = '\r\n\r\n    '.join(text.split())
    with open("元尊.txt",'a',encoding = 'utf-8') as f:
        f.write(title1)
        f.write(text)
    print(title+"下载完成")
    href = soup.select('.page_chapter > ul:nth-child(1) > li:nth-child(3) > a:nth-child(1)')[0]['href']
    url = 'https://www.biqiuge8.com' + href
    if href == '/book/24276/':
        print("下载完成")
        break


import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication


def 发送邮件(邮件标题,称呼,邮件文字,发件人邮箱,密码,收件人邮箱,附件路径列表,附件命名列表):
    msg = MIMEMultipart()
    msg["Subject"] = 邮件标题
    msg["From"]    = 发件人邮箱
    msg["To"]      = 称呼
    #这是文字部分
    part = MIMEText(邮件文字)
    msg.attach(part)
    #这是附件部分
    for 附件路径,附件命名 in zip(附件路径列表,附件命名列表):
        part = MIMEApplication(open(附件路径,'rb').read())
        part.add_header('Content-Disposition', 'attachment', filename=附件命名)
        msg.attach(part)

    s = smtplib.SMTP("smtp.qq.com", timeout=60)#SMTP服务的网址
    try:
        s.login(发件人邮箱, 密码)
        s.sendmail(发件人邮箱, 收件人邮箱, msg.as_string())#收件人邮箱可以是列表
        s.close()
        print("发送成功")
    except:
        print("发送失败")


# -*- coding:utf-8 -*-
import tkinter,sys,os
import webbrowser
import requests
from lxml import etree
#资料引入
def resource_path(relative_path):
    '''返回资源绝对路径。'''
    if hasattr(sys, '_MEIPASS'):
        # PyInstaller会创建临时文件夹temp
        # 并把路径存储在_MEIPASS中
        base_path = sys._MEIPASS
    else:
        base_path = os.path.abspath('.')
    return os.path.join(base_path, relative_path)
logo = resource_path('1.ico')
def getHTMLText(url):
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER',
            "cookie":"hng=CN%7Czh-CN%7CCNY%7C156; thw=cn; enc=sjA56zcJ9u%2F8HHfzWeGenkosE0sTJKOlrlE3PMUsXa4T4ucIGh7utUvQoTVW1UqNtT0z0zaGdAm6xBhWgTSwhQ%3D%3D; miid=202266272065909521; tracknick=zhongkangtb; _cc_=U%2BGCWk%2F7og%3D%3D; tg=0; cna=Q8BkEwdNjwACASrHh4ywLeIq; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; t=495b1ae6d656f25318235b91cf412b1a; cookie2=1e352b66734f3c1b206751d815d354ed; v=0; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; _tb_token_=e53945567abe7; mt=ci%3D-1_1; JSESSIONID=694C8BC65EFD5DCBCB5BD0FFFF0A8D54; isg=BOXl0kSA1LkPtzG_hnKhESi05qHfioaHPErXpufJqJwr_gRwr3FyhCfciCItfrFs; l=cBIvTKdVvmfdhDbzBOfZVuI81hQtoQ908sPzw4swkICP9wCH50UfWZeIGwLMCnGVK6SDR3Sq8BObB0LNuyCqJxpsw3k_J_f.."
        }
        r = requests.get(url,headers = headers,timeout = 30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        print("爬取失败")
        #'http://poedb.tw/tw/unique.php?n=Hands_of_the_High_Templar'
def 标准联盟(url):
    html = etree.HTML(getHTMLText(url))
    res = html.xpath("/html/body/div[5]/div/table[2]/tbody//tr//td[1]/text()")
    价格 = [i for i in res]
    res1 = html.xpath("/html/body/div[5]/div/table[2]/tbody//tr//td[2]/text()")
    数量= [i for i in res1]
    标准 = [[价格[i],数量[i]] for i in range(len(价格))]
    return 标准
def 赛季联盟(url):
    html = etree.HTML(getHTMLText(url))
    res3 = html.xpath("/html/body/div[5]/div/table[3]/tbody//tr//td[1]/text()")
    价格1 = [i for i in res3]
    res4 = html.xpath("/html/body/div[5]/div/table[3]/tbody//tr//td[2]/text()")
    数量1= [i for i in res4]
    赛季 = [[价格1[i],数量1[i]] for i in range(len(价格1))]
    return 赛季
list1 = [{'ch_name': '北方雷霆聖杖', 'en_name': 'Agnerod North'},]  #映射文件
def 获得url():
    r = tkinter.Tk()
    r.withdraw()
    try:
        st = r.clipboard_get().split('\n')[1]
        for i in list1:
            if st == i["ch_name"]:
                name = st
                url = f'http://poedb.tw/tw/unique.php?n={i["en_name"]}'
                #text1.delete(0.0, tkinter.END)
                #text1.insert(tkinter.INSERT, "OK正在打开网页")
                #webbrowser.open(url)
                res = [name,url]
                return res
        else:
            text1.delete(0.0, tkinter.END)
            text1.insert(tkinter.INSERT, "我居然没有找到这件传奇")
    except:
        text1.delete(0.0, tkinter.END)
        text1.insert(tkinter.INSERT, "你复制这个东西似乎有点问题啊")
'''

西里的戰衣
聖宗神手
'''
def 显示价格():
    name = 获得url()[0]
    text1.delete(0.0, tkinter.END)
    text1.insert(tkinter.INSERT, name +'\n')
    text1.insert(tkinter.INSERT, '\r\n标准联盟'+'\n')
    for i in 标准联盟(获得url()[1]):
        text1.insert(tkinter.INSERT, "%-15s %-5s \n"%(i[0],i[1]))
    text1.insert(tkinter.INSERT, '\r\n赛季联盟'+'\n')
    for j in 赛季联盟(获得url()[1]):
        text1.insert(tkinter.INSERT, "%-15s %-5s \n"%(j[0],j[1]))

def 打开网页():
    text1.delete(0.0, tkinter.END)
    text1.insert(tkinter.INSERT, "OK正在打开网页")
    webbrowser.open(获得url()[1])

win = tkinter.Tk()
win.iconbitmap(logo)
win.title("挖挖")
win.geometry('180x400+900+300')
win.resizable(0, 0)
tkinter.Button(win,text = "显示价格",command = 显示价格).pack(fill='both')
text1 = tkinter.Text(win,width=59,height=26)
text1.pack()
tkinter.Button(win,text = "打开网页链接",command = 打开网页).pack(fill='both')
win.mainloop()


import win32gui,win32con,win32api

# 操作win32相关
def 点击(id):
    win32gui.SendMessage(id,win32con.WM_LBUTTONDOWN, 0,0)
    win32gui.PostMessage(id,win32con.WM_LBUTTONUP, 0,0)
def 填信息(id,text):
    win32gui.SendMessage(id,win32con.WM_SETTEXT, 0,text)
def 发送回车(id):
    win32gui.SendMessage(id,win32con.WM_KEYDOWN, win32con.VK_RETURN,0)
    win32gui.PostMessage(id,win32con.WM_KEYUP, win32con.VK_RETURN,0)
def 列出子窗口句柄(id):
    hwndChildList = []
    win32gui.EnumChildWindows(id, lambda hwnd,param: param.append(hwnd),hwndChildList)
    n = 0
    for i in hwndChildList:
        print(i ,"{:#016X}".format(i),win32gui.GetWindowText(i),n)
        n += 1
    return hwndChildList
def 获得窗口标题的句柄(name):
    hwndChildList = []
    win32gui.EnumChildWindows(None, lambda hwnd,param: param.append(hwnd),hwndChildList)
    for i in hwndChildList:
        if name in win32gui.GetWindowText(i):
            return i
def 提取句柄文本(句柄):
    # 获取识别结果中输入框文本
    length = win32gui.SendMessage(句柄, win32con.WM_GETTEXTLENGTH)+1
    buf = win32gui.PyMakeBuffer(length)
    #发送获取文本请求
    win32api.SendMessage(句柄, win32con.WM_GETTEXT, length, buf)
    #下面应该是将内存读取文本
    address, length = win32gui.PyGetBufferAddressAndLen(buf[:-1])
    text = win32gui.PyGetString(address, length)
    return text
#print(提取句柄文本(列出子窗口句柄(获得窗口标题的句柄('来电接听'))[41]))


from tkinter import *
from tkinter import ttk

def 连续点击tkinter():
    表1 = [1,2,3]
    表2 = [4,5,6]
    表3 = [7,8,9]
     = [表1,表2,表3]

    def 选表():
        players["values"] = [var.get()-1]
        players.current(0) #默认第一个开始
    def 执行查找(*args):
        #使用players.get()
        #然后自增
        players.set(players["values"][players["values"].index(players.get())+1])
    root = Tk()
    var = IntVar()
    var.set(1)
    name = StringVar()
    root.wm_attributes('-topmost',1)

    单选框1 = Radiobutton(root, text="表1", value=1, variable=var, command = 选表).pack()
    单选框2 = Radiobutton(root, text="表2", value=2, variable=var, command = 选表).pack()
    单选框2 = Radiobutton(root, text="表3", value=3, variable=var, command = 选表).pack()

    players = ttk.Combobox(root, textvariable=name,width=50)
    players["values"] = 表1
    players["state"] = "readonly"

    players.current(0)
    # players.set("演员表")
    # print(players.get())

    players.pack()
    Button(root,text = "点击查询",command = 执行查找,width=50,height=20).pack()

    root.mainloop()


import json
import requests
def 爬虫获取post数据():
    #登陆url = 'user/login'
    postUrl = ''
    # payloadData数据
    payloadData = {
    '': ""
        }

    # data={
    #     '': "",
    #     '': "",
    # }

    # 请求头设置
    payloadHeader = {
    'Host': '',
    'Origin': '',
    #'Cookie': '',
    'Referer': '',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
    'Content-Type': 'application/json',
    'Content-Length': '32',
    'Connection': 'keep-alive',
    'Authorization': 'bearer eyJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE1NDE1OTQ4NjAsInN1YiI6ImJ3eWRiZyJ9.t_XDqE2CaCSxg_Mtw5BrgUCTFpjpmU9oyI32oGPO9wY',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Accept-Encoding': 'gzip, deflate',
    'Accept': 'application/json, text/plain, */*',
    }
    # 下载超时
    timeOut = 25
    #session=requests.session()
    #session.post(登陆url,headers=payloadHeader,data=data)
    #r = requests.post(postUrl, data=json.dumps(payloadData), headers=payloadHeader)
    dumpJsonData = json.dumps(payloadData)
    #res = session.post(postUrl, data=dumpJsonData, headers=payloadHeader, timeout=timeOut, allow_redirects=False)
    res = requests.post(postUrl, data=dumpJsonData, headers=payloadHeader, timeout=timeOut, allow_redirects=False)
    # 下面这种直接填充json参数的方式也OK
    # res = requests.post(postUrl, json=payloadData, headers=header)
    #print(f"responseTime = {datetime.datetime.now()}, statusCode = {res.status_code}, res text = {res.text}")
    res_dict = eval(res.text)
    信息列表 = []
    for i in res_dict['obj']:
        信息列表.append(i['khmc']+'--')
    return 信息列表


from selenium.webdriver.chrome.options import Options

# chrome_options = Options()
# #chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
# chrome_driver = "chromedriver.exe"
# driver = webdriver.Chrome(chrome_driver, chrome_options=chrome_options)

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.expected_conditions import presence_of_element_located
import time
# driver.find_elements_by_id("cheddar")
# driver.find_element_by_css_selector("#cheese #cheddar")
# driver.find_elements_by_class_name()
# driver.find_element_by_xpath()
# driver..find_element_by_link_text("新闻")
def 例子():
    with webdriver.Chrome() as driver:
        wait = WebDriverWait(driver, 10 , 0.5) #显式等待  until  /  until_not
        driver.implicitly_wait(10) #隐性等待
        driver.get("https://www.baidu.com") #发送请求
        #driver.refresh() #刷新浏览器
        #driver.set_window_size(1400,800) #设置浏览器大小
        driver.maximize_window()   #浏览器窗口最大化
        driver.find_element_by_id("kw").send_keys("selenium",Keys.ENTER)
        #driver.find_element_by_id("su").click()
        time.sleep(10)
        first_result = wait.until(presence_of_element_located((By.CSS_SELECTOR, "h3>div")))
        #print(first_result.get_attribute("textContent"))
def 综合():
    #先快捷方式后加' --remote-debugging-port=9222 --user-data-dir="C:\selenum\AutomationProfile"  https://live.bwjf.com/dashboard'打开浏览器
    chrome_options = Options()
    chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
    chrome_driver = r"C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe"
    driver = webdriver.Chrome(chrome_driver, chrome_options=chrome_options)
    driver.implicitly_wait(10)

    def 点击按钮(按钮位置):
        time.sleep(0.2)
        driver.find_element_by_xpath(按钮位置).click()
    def 填写内容(按钮位置,内容):
        driver.find_element_by_xpath(按钮位置).send_keys(内容)
    def 选择下拉框(按钮位置,选项):
        点击按钮(按钮位置)
        time.sleep(0.1)
        点击选项 = driver.find_element_by_xpath('''//span[text()="'''+选项+'''"]''')
        print(点击选项)
        点击选项.click()
    def 选择下拉框特殊(按钮位置,选项):
        点击按钮(按钮位置)
        time.sleep(0.1)
        点击选项 = driver.find_element_by_css_selector('body > div.el-select-dropdown.el-popper > div.el-scrollbar > div.el-select-dropdown__wrap.el-scrollbar__wrap > ul > li.el-select-dropdown__item:nth-child(2)')
        点击选项.click()
    def 输入后选择下拉框(按钮位置,内容和选项):
        填写内容(按钮位置,内容和选项)
        print('''//span[text()="'''+内容和选项+'''"]''')
        time.sleep(0.1)
        选项出现 = driver.find_element_by_xpath('''//span[text()="'''+内容和选项+'''"]''')
        time.sleep(0.5)
        选项出现.click()


import os,sys,django,xlrd
# 本文件在manage.py同文件夹下
def django导入库数据():
    文件名 = '表.xls'
    读取的Excel = xlrd.open_workbook(filename = 文件名)
    文件内第一个表= 读取的Excel.sheet_by_index(0)
    # def 获得列序号(表名,查找字段名):
    #     列序号 = None
    #     for i in range(表名.ncols):
    #         if (表名.cell_value(0,i) == 查找字段名):
    #             列序号 = i
    #             break
    #     return 列序号
    #竖向资料 = [文件内第一个表.col_values(i) for i in range(文件内第一个表.ncols)]
    横向资料 = [文件内第一个表.row_values(i) for i in range(1,文件内第一个表.nrows)]

    project_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    sys.path.append(project_path)
    os.environ['DJANGO_SETTINGS_MODULE'] = 'shuai.settings'
    django.setup()
    from APP名称.models import 模型类名
    list = []
    for i in 横向资料:
        list.append(模型类名(字段1 = i[0],字段2 = i[1],字段3 = i[2],字段4 = i[3]))

    模型类名.objects.bulk_create(list)


import os,sys

def 返回文件绝对路径(当前路径文件名):
    if hasattr(sys, '_MEIPASS'):
        # PyInstaller会创建临时文件夹temp
        # 并把路径存储在_MEIPASS中
        base_path = sys._MEIPASS
    else:
        base_path = os.path.abspath('.')
    return os.path.join(base_path, 当前路径文件名)

#print(返回文件绝对路径('list1.py'))


import os

def 返回当前路径第一个包含名字的文件(名字):
    namelist = os.listdir(os.getcwd())
    for x in namelist:
        if 名字 in x and '$' not in x:
            路径 = x
            return 路径
            break
#print(返回当前路径第一个包含名字的文件('lis'))


import xlrd

class 打开excel文件():
    def __init__(self,文件名,第几个表):
        self.文件 = xlrd.open_workbook(filename = 文件名)
        self. = self.文件.sheet_by_index(第几个表-1)


    def 获得横向资料(self):
        self.数据 = [self..row_values(i) for i in range(self..nrows)]
        return self.数据


    def 获得纵向资料(self):
        self.数据 =  [self..col_values(i) for i in range(self..ncols)]
        return self.数据

    def 获得名称列数据(self,名称):
        def 获得列序号(表名,查找字段名):
            列序号 = None
            for i in range(表名.ncols):
                if (表名.cell_value(0,i) == 查找字段名):
                    列序号 = i
                    break
            return 列序号
        print(获得列序号(self.,名称))
        self.数据 = self..col_values(获得列序号(self.,名称),2)
        return self.数据

# 文件 = 打开excel文件('测试.xlsx',1)
# print(文件.获得名称列数据('地址'))


import re

def 列表取第一个电话(list1):
    list2 = []
    for i in list1:
        for j in i:
            if re.match(r"^1\d{10}$", j):
                list2.append(j)
                break
        else:
            list2.append('mei')
    return list2
#list1 = [['123','13210000000','13210000000'],['159','15988886666'],['123']]
#print(列表取第一个电话(list1))


def 求列表元素出现次数字典(list):
    结果 = {}
    for i in set(list):
        结果[i] = list.count(i)
    return 结果


import pandas as pd

class 提取类():
    def __init__(self,文件名,第几个表):
        self.文件 = pd.read_excel(文件名, 第几个表)

    def 拿取数据(self,从第几行开始,*列名):
        self.数据 = self.文件.loc[从第几行开始-2:,[*列名,]]
        #print(self.数据)
        return self.数据
# 数据文件名 = "名称.xlsx"
# def 销售表提取():
#     销售表 = 提取类(数据文件名,0)
#     销售数据 = 销售表.拿取数据(8234,'企业名称','日期','服务费发票','金额','支付方式')
#     销售数据['发票类型'] = '纸票'
#     销售数据['日期'].fillna(method="ffill",inplace=True) #空着的日期按照前一个来
#     销售数据['日期'] = 销售数据['日期'].apply(lambda x:x.strftime('%Y-%m-%d') if x == x and type(x) != type('sd') else '日期为空')
#     销售数据 = 销售数据.values.tolist()


import openpyxl
from openpyxl.styles import Alignment, Font

def openpyxl写新文件():
    填充的数据列表 = []

    结果文件 = openpyxl.Workbook()

    表1 = 结果文件.active

    表1.title = '表1'
    表1.merge_cells(start_row=1, start_column=1, end_row=1, end_column=6)
    表1.cell(1, 1).value = '这里我就写个字'

    标题 = 表1['A1']
    标题.font = Font(name = '黑体',size = 20)
    标题.alignment = Alignment(horizontal='center', vertical='center')

    表1.column_dimensions['B'].width = 33
    表1.column_dimensions['C'].width = 11

    for i in 填充的数据列表:
        表1.append(i)

    表2 = 结果文件.create_sheet('表2')
    表2.append('数据')

    结果文件.save('结果文件.xlsx')


布丁

易将木剑交给了师父,两人盘腿坐在大树下,师父从怀里摸出一块铁片,在木剑上刻着字。

“无什么……什么什么……易大什么?”易抓着脑袋,念着师父刻的字。

“这些字读,「无极剑派第三百六十九位传人,易大师」。叫你好好读书,连这些字都不认识。”师父敲着易的脑袋说。

“明明是你字太烂……弯弯曲曲的像蚯蚓一样。”易撅着嘴,摸着脑袋,委屈的说。

“是吗,哈哈……”师父摸着脑袋,尴尬的笑着。

“师父,为什么要刻个易大师呢?易就可以了啊。”

“因为带有大师的人,都是很厉害的人。为师希望你以后能将无极剑派的思想发扬光大,所以才刻上大师两个字,这是为师对你的期望。”

“徒儿绝不会辜负师父的期望!”