#coding:utf-8 from fake_useragent import UserAgent; import requests import random; import re; import time import sys import os from ctypes import * import utils cookies = dict( cookies='.ASPXANONYMOUS=DhbjUsQf1AEkAAAAZmVjMmYyMTctMTlmNS00MTllLTgzMmUtZDFkNTA3OTViN2VkuvVIoIOYESDZR_leaNC6NV-w79k1; UM_distinctid=1634f8e388b28d-05125773ab69a5-d35346d-100200-1634f8e388cc8; ASP.NET_SessionId=le35jdxpgrzirkzduo43omrq; ConnectQQ=1; lllogcook=1; SojumpSurvey=0102A8A1C007E3B8D508FEA841D28E04B9D5080007710071002400035FAB8E0B95035200012F00FF62B89218EA1DEEC74901427ECECF7A926192C69D; Hm_lvt_21be24c80829bd7a683b2c536fcf520b=1526048589,1526098342,1526121057,1526223957; _cnzz_CV4478442=%E7%94%A8%E6%88%B7%E7%89%88%E6%9C%AC%7C%E5%85%8D%E8%B4%B9%E7%89%88%7C1526223958644; CNZZDATA4478442=cnzz_eid%3D2019209005-1526047631-%26ntime%3D1526647785; Hm_lpvt_21be24c80829bd7a683b2c536fcf520b=1526648270') ua = UserAgent() curID = "23679247"; headers={ "user-agent":ua.random, #"Content-Type":"application/x-www-form-urlencoded", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Encoding": "gzip, deflate", "Accept-Language": "en-US,en;q=0.5", "Connection": "keep-alive", #"Cookie":".ASPXANONYMOUS=yMe5V6Uf1AEkAAAAZjAyNGU5NjEtM2NjMy00MzZiLWI5ZTQtODMzOTI4MmFjZTVh967A7B0luCTPXTDAtR1UnYccp_41; UM_distinctid=1634ec2ef5c1-0a0429878bc11f-4d015463-1fa400-1634ec2ef5d357; SojumpDHCX=1; CNZZDATA4478442=cnzz_eid%3D1416271409-1526031635-%26ntime%3D1526521560; Hm_lvt_21be24c80829bd7a683b2c536fcf520b=1526034788,1526349109,1526352463,1526526745; Hm_lpvt_21be24c80829bd7a683b2c536fcf520b=1526526842" } url = "https://www.xxx.cn " #随机ip def get_proxies_random(): ip_array = ["https://113.109.162.85:808","https://114.239.89.58:61234","https://42.51.12.2:808", "https://117.64.237.43:808","https://113.86.223.234:808","https://27.40.138.220:61234" ] ip_number = random.randint(0, len(ip_array) - 1) proxies_random = { "https": ip_array[ip_number], # "https":"https://1.196.203.123:808" } return proxies_random; #固定ip proxies = get_proxies_random(); #返回地区唯一 num def get_url(url,headers,ip): result = {} try: r = requests.get(url,headers=headers,proxies=ip,cookies=cookies, timeout=5) if r.status_code == 200: r.encoding = 'utf-8' res = r.text re2 = re.compile(r"(?<=rndnum=\").+?(?=\";)") # result[0] ; result['rndnum'] = int(re2.findall(res)[0].split('.')[0]); yzm = re.compile(r"(?<=ctl00_ContentPlaceHolder1_JQ1_tdCode).+?(?=;\">)") yzm_result = yzm.findall(res)[0].split(": ") if len(yzm_result) == 2: #无验证码 #result[1] == 2; result['yzm'] = 2 pass else: #有验证码 #result[1] == 1; result['yzm'] = 1 vt_time = round(time.time() * 1000); ### 如果这里 if r.status_code == 200: 第一次成功 yyzzmm 返回的验证码是正确的,否则超过两次都是None yyzzmm = get_validate_text(ip, vt_time, 10) print(yyzzmm) print("2222222222222222222222") print(type(yyzzmm)) result['t'] = yyzzmm print(result) sys.exit() return result; return result else: result['rndnum'] = 100; return result; except requests.RequestException: result['rndnum'] = 200; return result #失败继续请求 url 码 def fail_get_url(): #time.sleep(1); print(" fail_get_url 继续...") fail_ip = get_proxies_random() print(fail_ip) rndnum = get_url(url,headers,fail_ip) #print(rndnum) if rndnum['rndnum'] > 1000: post_data(rndnum,headers,fail_ip) #print(rndnum) else: fail_get_url(); pass pass # 获取当前时间 def get_starttime(): time_now = int(time.time()) # 转换成localtime time_local = time.localtime(time_now) # 转换成新的时间格式(2016-05-09 18:59:20) dt = time.strftime("%Y-%m-%d %H:%M:%S", time_local) return dt #识别验证码 def get_ydm_number(): #print("识别验证码中..." + filename) # 验证码文件路径 # filename = b'getimage.jpg' filenames = b'image/1.jpg' result = c_char_p(b" ") captchaId = YDMApi.YDM_EasyDecodeByPath(username, password, appId, appKey, filenames, codetype, timeout, result) codes = str(result.value, encoding="utf-8"); print(codes) #这个返回值一直都是对的 return codes; # print (codes) # time.sleep(5) #print(str(result.value, encoding="utf-8")) def get_validate_text(proxies,vt_time,num): try: print("次数: "+str(num)); # headers={ # "user-agent":ua.random, # "Content-Type":"image/gif", # } print("获取验证码中....") print (proxies) vt_url = "https://www.xxx.cn/?q="+curID+"&t="+str(vt_time); pic = requests.get(vt_url, headers=headers,cookies=cookies, proxies=proxies,timeout=3) ### 如果这里第一次成功 yyzzmm 返回的验证码是正确的,否则都是None ### 但是 get_ydm_number() 返回的验证码又是正确的 if pic.status_code == 200: file_name = "image/1.jpg" # 拼接图片名 # 将图片存入本地 fp = open(file_name, 'wb') fp.write(pic.content) # 写入图片 fp.close() return get_ydm_number(); # print ("///////////////////////////") # print (yzm) # print ("///////////////////////////") # # return yzm #返回图片路径 else: print ("又开始了get_validate_text 获取验证码了") get_validate_text(proxies, vt_time, num) pass except requests.RequestException: print ("获取验证码异常继续....") get_validate_text(proxies, vt_time, num) if __name__ == '__main__': print(proxies) rndnum = get_url(url,headers,proxies) if rndnum['rndnum'] > 1000 : post_data(rndnum,headers,proxies); pass else: #time.sleep(1); fail_get_url(); #结束
好奇怪的问题 yyzzmm 的返回值 如果if r.status_code == 200:只有一次成功的话,他就是对的,否则就是None ? 为什么啊 ...
你意思是返回内容是None,然后识别图片是对的吗?
推荐你把模块理清楚,分开分析处理。
不是,只要是if r.status_code == 200: 这段不是200的话后面的就会返回None,一次成功的话就不会这样
获取验证码中....
获取验证码异常继续....
次数: 10
获取验证码中....
PFEE
None
这里 明明是识别到验证码了,返回,但是yyzzmm 的值却是None,搞不懂 ...
@弃身锋刃: 是说你第一次返回不是200,走到else之后每次都失败?你看看不是200的时候返回的是啥,打印r.status_code,r.content这些看看
@弃身锋刃: 好像明白了,你是说验证码若没有一次识别成功就会打印None,一次识别成功的时候打印的是验证码。看了一下,理论上get_validate_text的返回值一定是验证码,你打印的时候,用print(“yyzzmm” + yyzzmm)看看是不是这个地方打印的 None,或者调试看看这个None到底是谁打印的。
@Masako: 我调试了 这个yyzzmm 返回的是None 就是走else 之后都是None,不走else 返回是对的
@Masako: 但是return get_ydm_number() 每次返回的值又是正确的
@弃身锋刃:
你把
else:
print ("又开始了get_validate_text 获取验证码了")
get_validate_text(proxies, vt_time, num)
改成
else:
print ("又开始了get_validate_text 获取验证码了")
return get_validate_text(proxies, vt_time, num)
就可以了
@弃身锋刃: 你的else只是递归调用了自己,没有设置返回值
@Masako: 666,你是对的,我没看出来
周末愉快^_^
@Masako: 感谢!
....这个排版惨不忍睹
后面的删了 ,不用看 , 不小心复制到的...