from selenium import webdriver
import time
class JdSpider(object):
def __init__(self):
self.url='https://www.jd.com/'
self.browser=webdriver.Chrome()
self.i=0
def get_html(self):
so='//*[@id="key"]'
button='//*[@id="search"]/div/div[2]/button'
self.browser.get(self.url)
self.browser.find_element_by_xpath(so).send_keys('爬虫书')
self.browser.find_element_by_xpath(button).click()
time.sleep(3)
def parse_html(self):
self.browser.execute_script(
'window.scrollTo(0,document.body.scrollHeight)'
)
time.sleep(3)
li_list=self.browser.find_elements_by_xpath('//*[@id="J_goodsList"]/ul/li')
item={}
for li in li_list:
item['price']=li.find_element_by_xpath('.//div[@class="p-price"]').text.strip()
item['name']=li.find_element_by_xpath('.//div[@class="p-name"]/a/em').text.strip()
item['comment']=li.find_element_by_xpath('.//div[@class="p-commit"]/strong').text.strip()
item['market']=li.find_element_by_xpath('.//div[@class="p-shopnum"]').text.strip()
print(item)
self.i+=1
def run(self):
self.get_html()
while True:
self.parse_html()
if self.browser.page_source.find('pn-next disabled')==-1:
self.browser.find_element_by_class_name('pn-next').click()
time.sleep(3)
else:
break
print("爬取数量:",self.i)
if __name__ == '__main__':
spider=JdSpider()
spider.run()
转载请注明原文地址:https://ipadbbs.8miu.com/read-16799.html