1 def start_url(url): 2 nianduContent = requests.get(url).text #获取网页源代码 3 nian = re.findall('<option value=".*?".*?>(.*?)</option>', nianduContent, re.S) #获取年份 4 # print(nian) 5 for n in nian: 6 if '季度' not in n and '-' not in n: #过滤年份中包含‘季度’和‘-’的 7 # print(n) 8 jidu = range(1, 5) #1~4 四个季度 9 for j in jidu: 10 # print(j) 11 # for urls in url: 12 urls = url + str(n) + '&season=' + str(j) #拼接url 13 # print(urls) 14 return urls
输出结果:
http://quotes.****.***.com/trade/lsjysj_*******.html?year=1999&season=1
http://quotes.****.***.com/trade/lsjysj_*******.html?year=1999&season=2
http://quotes.****.***.com/trade/lsjysj_*******.html?year=1999&season=3
http://quotes.****.***.com/trade/lsjysj_*******.html?year=1999&season=4
....
http://quotes.****.***.com/trade/lsjysj_*******.html?year=1999&season=4
代码如上,函数内运行的都很正常,循环的也正常
但是调用的时候,就只输出一条
调用:
1 url = 'http://quotes.*****.***.com/trade/lsjysj_******.html?year=' 2 s = start_url(url) 3 print(s)
输出结果:
http://quotes.****.***.com/trade/lsjysj_*******.html?year=1999&season=4
def start_url(url): nianduContent = requests.get(url).text #获取网页源代码 nian = re.findall('<option value=".*?".*?>(.*?)</option>', nianduContent, re.S) #获取年份 urls=list() # print(nian) for n in nian: if '季度' not in n and '-' not in n: #过滤年份中包含‘季度’和‘-’的 # print(n) jidu = range(1, 5) #1~4 四个季度 for j in jidu: # print(j) # for urls in url: tempurl = url + str(n) + '&season=' + str(j) #拼接url urls.append(tempurl) # print(urls) return urls url = 'http://quotes.*****.***.com/trade/lsjysj_******.html?year=' s = start_url(url) print(s)
声明list类型的urls=list()
为啥我直接用urls = []不行呢,也是一个空列表呀
def start_url(url): nianduContent = requests.get(url).text #获取网页源代码 nian = re.findall('<option value=".*?".*?>(.*?)</option>', nianduContent, re.S) #获取年份 urls=[] # print(nian) for n in nian: if '季度' not in n and '-' not in n: #过滤年份中包含‘季度’和‘-’的 # print(n) jidu = range(1, 5) #1~4 四个季度 for j in jidu: # print(j) # for urls in url: tempurl = url + str(n) + '&season=' + str(j) #拼接url urls.append(tempurl) # print(urls) return urls url = 'http://quotes.*****.***.com/trade/lsjysj_******.html?year=' s = start_url(url) print(s)