class spider(scrapy.Spider): name='doubanmusic' start_urls=['https://m.douban.com/music/'] allowed_domains=['music.douban.com'] def parse(self,response): for types in response.xpath('//ul[@class="type-list"]/li/a'): item=DoubanmusicItem() type=types.xpath('text()').extract()[0] item['type']=type type_href=types.xpath('@href').extract()[0] item['type_url']=response.urljoin(type_href) yield item yield scrapy.Request(url=item['type_url'],meta={'item':item},callback=self.get_info) def get_info(self,response): for sel in response.xpath('//div[@class="pl2"]'): item=response.meta['item'] song=sel.xpath('a/text()').extract()[0].strip() item['song']=song info=sel.xpath('p[@class="pl"]/text()').extract()[0] item['info']=info yield item next_page = response.xpath('//span[@class="next"]/a/@href').extract() if next_page: next_url = response.urljoin(next_page[0]) yield scrapy.Request(url=next_url,callback=self.get_info)