完整代码
import requests_cache as rc
from lxml import etree
import requests
from lxml import html
from html.parser import HTMLParser
rc.install_cache()
res=rc.CachedSession()
url="https://www.dy2018.com"
head={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36'
}
reponse=res.get(url,headers=head )
html=reponse.text
res.encoding='gb2312'
bs=etree.HTML(html)
d=bs.xpath('//*[@id="header"]/div/div[3]/div/div/div[2]/ul/li/a/@href')
for i in d:
r= "https://www.dy2018.com" + format(i)
# print(r)
xml=res.get(r)
res.encoding='gb2312'
ads=etree.HTML(xml.text)
dd=ads.xpath('//*[@id="Zoom"]//br').text()
print(dd)
url对应的文本吗?
req = requests.get(url=url, headers=headers)
print(req)