python 京东爬虫

    技术2023-08-07  77

    def home(q,page): URL = 'https://so.m.jd.com/ware/search._m2wq_list?keyword=' + str(q) + '&datatype=1&callback=j&page=' + str( page) + '&pagesize=60&ext_attr=no' headermap = jdheader.jdhome() statuses = mayi3.sousuojson(URL, headermap.get("headers"), headermap.get("mayi_proxy")).text statuses = str_manage.quchukonggexiehang(statuses) statuses = str(str_manage.tihuan(statuses, 'j(', '')).replace("<!--csend--><html><head></head><body></body></html>","") statuses = str(str_manage.tihuan(statuses, ')', '')).replace("<!--csstart-->","") jddatajson = str_manage.strJson(statuses) data = jddatajson['data']['searchm']['Paragraph'] summary = jddatajson['data']['searchm']['Head']['Summary'] data_list = [] for i in range(0, len(data)): try: warename = data[i]['Content']['warename'] # 标题 wareid = data[i]['wareid'] # id skuId dredisprice = data[i]['dredisprice'] # id CustomAttrList = data[i]['Content']['CustomAttrList'] # 型号 vender_id = data[i]['vender_id'] # 型号venderId imageurl = data[i]['Content']['imageurl'] # 主图 view_price = data[i]['dredisprice'] # 价格 shop_name = data[i]['shop_name'] # 店铺名 shop_id = data[i]['shop_id'] # 店铺名 # 标题,链接,现价,店铺名,销量(如有) map = { "title": warename, "url": "https://item.jd.com/" + wareid + ".html", "itemId": dredisprice, "xinghao": CustomAttrList, "xinghaoid": vender_id, "pic_url": imageurl, "view_price": view_price, "shopNames":shop_name, "id":shop_id, } data_list.append(map) except: print("解析或者添加有错") map = { 'date': data_list, 'page': summary } return map
    Processed: 0.010, SQL: 9