# -*- codeing = utf-8 -*- # @Time: 2022/4/8 0:02 # @Software: PyCharm # @File: wallhavenBiZhi.py import requests from lxml import etree import time import random class BZ(): # 实例化etree def tree(self,e): return etree.HTML(e) #获取到图片的content def getBZ(self,url): # 翻页10页 for page in range(1,10): headers = { # 'referer': 'https://wallhaven.cc/', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36', } print(time.strftime("%H:%M:%S")) #随机爬取第1到50页 # page=random.randint(1,50) print("第{}页".format(page)) url1 = url.format(page) # 一级页面请求 html = requests.get(url=url1, headers=headers, timeout=5.0).text data = self.tree(html) li_list = data.xpath('.//div[@id="thumbs"]//li') # print(li_list) # 每一张图片的url地址 for li in li_list: img = li.xpath('.//img[@class="lazyload"]/@data-src') if img: img=img[0] img_end=img[-10:] tupian_url='https://w.wallhaven.cc/full/'+img[-10:-8]+'/wallhaven-'+img_end tupian = requests.get(url=tupian_url, headers=headers, timeout=5.0) if tupian.status_code == 404: # print(img_end) img_end = img_end[:-3] + 'png' tupian_url = 'https://w.wallhaven.cc/full/' + img[-10:-8] + '/wallhaven-' + img_end tupian = requests.get(url=tupian_url, headers=headers, timeout=5.0) content_picture = tupian.content self.save_picture(img_end,content_picture) print(tupian_url) # 保存图片 def save_picture(self,img_end,content_picture): # 保存路径 with open('C:/Users/19873/Pictures/bizhi/' + img_end, 'wb') as file: file.write(content_picture) print('保存完成' + time.strftime("%H:%M:%S")) if __name__ == '__main__': url='https://wallhaven.cc/hot?page={}' bz=BZ() bz.getBZ(url)
python教程
Python爬取wallhaven壁纸脚本
python教程
51源码
2022-11-01
共人阅读
下一篇: 返回列表
热门推荐
-
01利用Python开发个简单版的磁力搜索工具源码 0
-
02批量转存百度网盘分享资源Python脚本 0
-
03Python爬取豆瓣电影top250排行榜 0
-
04微信小程序羊了个羊python全自动刷榜脚本方法 0
-
05羊了个羊刷通关次数python版本 0