首页 新闻 搜索 专区 学院

python CODE 3

0
[已关闭问题] 关闭于 2016-01-11 17:20

#coding=utf-8
import os,re,time
import urllib
import urllib2


def getNewDir(dir_name):
curr_dir = os.getcwd()
curr_dir = curr_dir.replace('\\','/')
file_dir = curr_dir + '/' + dir_name + '/'
return file_dir

def getHtml(url):
req = urllib2.Request(url)
res = urllib2.urlopen(req)
html = res.read()
return html

def downImg(url,filepath):
urllib.urlretrieve(url, filepath)

def url_build_proxy_opener(proxy_info):
passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
passmgr.add_password(None, proxy_info['server'] , proxy_info['user'], proxy_info['password'])
auth = urllib2.ProxyBasicAuthHandler(passmgr)
opener = urllib2.build_opener(urllib2.ProxyHandler({'http':proxy_info['server']}) , auth)
return opener

 

if __name__=="__main__":

for pageIndex in range(1,100):

#请求这个动态js更新的内容,循环修改page页"
url = "http://ing.cnblogs.com/ajax/ing/GetIngList?IngListType=all&PageIndex=" + str(pageIndex) + "&PageSize=30"

#由于权限问题,需要挂载本地代理
enable_proxy = True


#创建本地路径,用以存放下载的图片,保存图片的文件夹存放在与该脚本同级目录下
dirName = "BlogsPictures"
file_dir = getNewDir(dirName)

if(os.path.exists(file_dir) == False):
os.mkdir(file_dir)
else:
pass

#代理信息,装载代理
proxy_info = {'user':'hehe', 'password':'你y屏蔽我呀' , 'server':'openproxy.huawei.com:8080'}
opener = url_build_proxy_opener(proxy_info)

#这种方法不可用
#proxy = 'http://%s:%s@%s:%s' % ('username', 'password', 'openproxy.xxxx.com', 8080)
#proxy_handler = urllib2.ProxyHandler({"http" : proxy})
#null_proxy_handler = urllib2.ProxyHandler({})

if enable_proxy:
opener = url_build_proxy_opener(proxy_info)
else:
opener = url_build_proxy_opener()
urllib2.install_opener(opener)


#获取url的完整html,并在其中找出图片信息
gethtml = getHtml(url)
pic_patten = '<img width="36" height="36" src="(.+?)" alt=""/>'
pic_details = re.findall(pic_patten,gethtml)


#找出头像对应的用户名
user_patten = '<a href="http://home.cnblogs.com/u/(.+?)/" class="ing-author" target="_blank">(.+?)</a>'
#<a href="http://home.cnblogs.com/u/775401/" class="ing-author" target="_blank">RosonJ</a>
users = re.findall(user_patten,gethtml)


NumberOfPics = len(pic_details)


#执行下载图片函数
NumberOfUsers = len(users)
for i in range(0,NumberOfUsers):
imgurl = pic_details[i]
fileSavePath = file_dir + '/' + users[i][0] + ".png"
print "正在下载第" + str(i+1) + "个图片..."

f=open(fileSavePath,'wb')
f.write(getHtml(imgurl))
f.close()

#downImg(imgurl,fileSavePath)

print "下载完成,准备下一个"
time.sleep(1)

print "第" + str(pageIndex) + "波下载完毕,准备下载下一波..."
time.sleep(3)

west_Tang风的主页 west_Tang风 | 菜鸟二级 | 园豆:201
提问于:2016-01-11 17:20
< >
分享
清除回答草稿
   您需要登录以后才能回答,未注册用户请先注册