欢迎光临我要源码 ,本站所有资源仅供学习与参考,禁止用于商业用途或从事违法行为!

python教程

Python爬取wallhaven壁纸脚本

python教程 51源码 2022-11-01 人阅读
# -*- codeing = utf-8 -*-
# @Time: 2022/4/8 0:02
# @Software: PyCharm
# @File: wallhavenBiZhi.py
import requests
from lxml import etree
import time
import random
 
 
class BZ():
 
    # 实例化etree
    def tree(self,e):
        return etree.HTML(e)
 
    #获取到图片的content
    def getBZ(self,url):
 
        # 翻页10页
        for page in range(1,10):
 
            headers = {
                # 'referer': 'https://wallhaven.cc/',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36',
            }
            print(time.strftime("%H:%M:%S"))
 
            #随机爬取第1到50页
            # page=random.randint(1,50)
            print("第{}页".format(page))
 
            url1 = url.format(page)
            # 一级页面请求
            html = requests.get(url=url1, headers=headers, timeout=5.0).text
            data = self.tree(html)
            li_list = data.xpath('.//div[@id="thumbs"]//li')
            # print(li_list)
            # 每一张图片的url地址
            for li in li_list:
                img = li.xpath('.//img[@class="lazyload"]/@data-src')
                if img:
                    img=img[0]
                    img_end=img[-10:]
 
                    tupian_url='https://w.wallhaven.cc/full/'+img[-10:-8]+'/wallhaven-'+img_end
 
                    tupian = requests.get(url=tupian_url, headers=headers, timeout=5.0)
                    if tupian.status_code == 404:
                        # print(img_end)
                        img_end = img_end[:-3] + 'png'
 
                        tupian_url = 'https://w.wallhaven.cc/full/' + img[-10:-8] + '/wallhaven-' + img_end
 
                        tupian = requests.get(url=tupian_url, headers=headers, timeout=5.0)
 
 
                    content_picture = tupian.content
 
                    self.save_picture(img_end,content_picture)
                    print(tupian_url)
 
    # 保存图片
    def save_picture(self,img_end,content_picture):
        # 保存路径
 
        with open('C:/Users/19873/Pictures/bizhi/' + img_end, 'wb') as file:
            file.write(content_picture)
 
            print('保存完成' + time.strftime("%H:%M:%S"))
 
 
if __name__ == '__main__':
    url='https://wallhaven.cc/hot?page={}'
    bz=BZ()
    bz.getBZ(url)