python 某美电商平台爬虫

    技术2023-08-15  68

    def run(q,searchType,page): headermap = guomeiheader.guomei() URL = 'http://search.gome.com.cn/search?question=%s&searchType=%s&page=%s&bws=0&type=json&rank=1&search_mode=normal'\ % (q,searchType,page) guoMeiJson = mayi3.daili(URL, headermap.get("headers"), 'UTF-8', headermap.get("mayi_proxy")).json() products=guoMeiJson["content"]["prodInfo"]["products"] totalPage = guoMeiJson["content"]["pageBar"]["totalPage"] totalCount = guoMeiJson["content"]["pageBar"]["totalCount"] List = [] j = 0 for i in range(0, len(products)): alt = products[i]["alt"]#标题 try: shopId = products[i]["shopId"]#店铺id except: shopId = "" try: cityName = products[i]["cityName"]# 发货地 except: cityName = "" try: mUrl = products[i]["mUrl"]#店铺首页 except: mUrl = "" promoDesc = products[i]["promoDesc"]#红字提示 try: skuId = products[i]["skuId"]#skuid except: skuId = "" pId = products[i]["pId"] # 商品id try: sName = products[i]["sName"]#店铺名字 except: sName = "" try: sImg = products[i]["sImg"]#主图 except: sImg = "" itmeurl = "http://item.gome.com.cn/"+str(pId)+'-'+str(skuId)+'.html'# 商品连接 dateMap = { "title" : alt, #标题 "item_loc" : cityName, # 所在地区 "wangwang" : sName, # 掌柜旺旺 "raw_title" : promoDesc, # 卖点 "url" : itmeurl, # 商品链接 "shopLink": mUrl,# 店铺链接 "pic_url": sImg,#主图 "pingtaiid": 8, # 平台id "shopName":sName,# 店铺名字 "itemId":pId,# 商品id } List.append(dateMap) map = { "date":List, "totalPage":totalPage, "totalCount":totalCount, "strue":True } return map #国美商品详情查询 def dataItem(url): #获取国美店铺链接然后提取出来他的pid和skuid urldata = url_data.guomeilianjieurl(url) URL = 'http://ss.gome.com.cn/item/v1/d/m/store/unite/%s/%s/N/22010200/220102001/1/null/flag/item' \ % (urldata[0], urldata[1]) headermap = guomeiheader.dataItem() guoMeiItemJson = mayi3.daili(URL, headermap.get("headers"), 'UTF-8', headermap.get("mayi_proxy")).json() try: groupProps = guoMeiItemJson["groupProps"] except: groupProps = "" try: category = guoMeiItemJson["category"] except: category = "" try: shop_id = guoMeiItemJson["shop_id"][0] except: shop_id = "" try: view_price = guoMeiItemJson["view_price"] except: view_price = "" try: pic_url = guoMeiItemJson["pic_url"][0] except: pic_url = "" try: props = guoMeiItemJson["props"][0] except: props = "" try: title = guoMeiItemJson["title"] except: title = "" try: shopLink = url_data.urlheaderyanzheng(guoMeiItemJson["shopLink"][0]) except: shopLink = "" try: itemId = guoMeiItemJson["itemId"][0] except: itemId = "" try: shopInfo = guoMeiItemJson["shopInfo"][0] except: shopInfo = "" map = { "groupProps":groupProps, "category": category, "shop_id": shop_id, "view_price": view_price, "pic_url": pic_url, "props": props, "title": title, "shopLink": shopLink, "itemId": itemId, "shopInfo": shopInfo } return map
    Processed: 0.009, SQL: 9