import requests
from bs4 import BeautifulSoup
def get_movie():
headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'Host':'movie.douban.com'}
movie_list =[]
for i in range(0,10):
link = 'https://movie.douban.com/top250?start=' + str(i * 25)
r = requests.get(link,headers =headers,timeout=8)
print(str(i+1),"内容:",r.status_code)
soup = BeautifulSoup(r.text,"lxml")
div_list = soup.find_all('div',class_='hd')
for each in div_list:
movie = each.a.span.text.strip()
movie_list.append(movie)
return movie_list
print(get_movie())
for each in div_list: movie = each.a.span.text.strip() movie_list.append(movie) return movie_list 改成: for each in div_list: movie = each.a.span.text.strip() movie_list.append(movie)、 return movie_list 注意: return movie_list 要跟for对齐
谢谢大佬!(^▽^)
#你在获取第一个的时候return掉了把函数停了
import requests
from bs4 import BeautifulSoup
def get_movie():
movie_list=[]
for i in range(0, 10):
print(f'{i+1}页')
headers ={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36'}
link = f'https://movie.douban.com/top250?start={i*25}'
print(link)
response = requests.get(link,headers=headers)
soup = BeautifulSoup(response.text,'lxml')
div_list = soup.find_all('div',class_='hd')
for each in div_list:
movie = each.a.span.text.strip()
print(movie)
movie_list.append(movie)
return movie_list
if __name__ == '__main__':
print(get_movie())
不过这个模块我感觉有点过时了,给你看看新模块
from requests_html import HTMLSession
session = HTMLSession()
def get_movie():
movie_list=[]
for i in range(0, 10):
print(f'{i+1}页')
link = f'https://movie.douban.com/top250?start={i*25}'
print(link)
response = session.get(link)
div_list = response.html.find('.hd')
for each in div_list:
movie = each.text
movie_list.append(movie)
return movie_list
if __name__ == '__main__':
print(get_movie())
十分感谢!!!!