import requests
import warnings
import json
warnings.filterwarnings("ignore")
import uuid
from sqlalchemy import create_engine
from pyquery import PyQuery as pq
user='root'
pwd='a123456'
host='localhost'
port='3306'
db='jddata'
sqlurl='mysql+pymysql://{user}:{pwd}@{host}:{port}/{db}?charset=utf8'.format(user=user,pwd=pwd,host=host,port=port,db=db)
engine=create_engine(sqlurl)
f=open('index.json','r+',encoding='utf8')
obj=json.load(f,encoding='utf8')
index=1
for data in obj["data"]:
for data1 in data["s"]:
if not data1["s"]:
continue
else:
for data2 in data1["s"]:
level2=str(data2["n"]).split("|")[1]
for data3 in data2["s"]:
level3=str(data3["n"]).split("|")
url =""
if "html" in level3[0] or "-" in level3[0]:
if "-" in level3[0]:
url="https://list.jd.com/list.html?cat="+level3[0].replace("-",",")
else:
url="https://"+level3[0]
print("insert into jdlevel(id,level2,level2_id,product_url) values({id},'{level2}','{level2_id}','{url}')".format(id=index,level2=level2,level2_id=str(uuid.uuid1()),url=url))
engine.execute("insert into jdlevel(id,level2,level2_id,product_url) values({id},'{level2}','{level2_id}','{url}')".format(id=int(index),level2=level2,level2_id=str(uuid.uuid1()),url=url))
print(str(index) + "---" + level2+"----"+level3[1]+"---"+url)
index = index + 1