import requests
import re
import os
if name=="main":
if not os.path.exists("F:/tupian"):
os.mkdir("F:/tupian")
url="https://www.qiushibaike.com/imgrank/#"
head={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"}
response=requests.get(url=url,headers=head).text
ex='<div class="thumb">.?<img src="(.?)" alt.*?</div>'
image_list=re.findall(ex,response,re.S)
i=0
for src in image_list:
src="https:"+src
i+=1
image_data=requests.get(url=url,headers=head).content
image_name=src.split(sep="/")[-1]
image_path="F:/tupian/"+image_name
with open(image_path,"wb")as fp:
fp.write(image_data)
print(image_name,"下载成功")
#第一步你获取图片list正则表达式有问题,不过建议用xpath
import requests
import os
from lxml.html import etree
if __name__ == '__main__':
if not os.path.exists("D:/tupian"):
os.mkdir("D:/tupian")
url="https://www.qiushibaike.com/imgrank/#"
head={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"}
response=requests.get(url=url,headers=head).text
res_demo = etree.HTML(response)
img_list = res_demo.xpath('//*[@class="thumb"]/a/img/@src')
print(img_list)
这个则表达式有问题,
叫你一种方法:
一般把你要正则匹配的内容拿一个示例出来
吧想要到的结果用正则替换,其余的不要动原样匹配,一般正则就没问题了,出问题的地方一般就是空格没注意到