本帖最后由 hackerbob 于 2022-7-20 17:58 编辑
[Python] 纯文本查看 复制代码 import requests
from lxml import etree
ys = 1
hz = input("请输入保存的后缀名:")
path = input("请输入保存路径:")
ppy = input("请输入要爬到多少页(为空则默认全爬完):")
if ppy == "":
ppy = 0
head = {
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36 Edg/99.0.1150.55"
}
url = "http://bizhi360.com/weimei/"
while True:
if ys == 1:
page ="index.html"
else:
page = "list_" + str(ys) + ".html"
resp = requests.get(url=url + page,headers=head)
if ys == ppy:
break
if resp.status_code == 404:
break
else:
tree = etree.HTML(resp.content)
tit = tree.xpath('//*[@id="main"]/div/div[1]/ul/li/a/@title')
pi = tree.xpath('//*[@id="main"]/div/div[1]/ul/li/a/img/@src')
for a,b in zip(pi,tit):
respp = requests.get(url=a,headers=head)
ppath = path + "\\" + b + "." + hz
open(ppath,"wb").write(respp.content)
ys = ys + 1
print("已爬到第" + str(ys) + "页")
input("下载完成,请按任意键继续....")
|