from selenium import webdriver
import time
class Douyu(object):
def __init__(self):
self.url = 'https://www.douyu.com/directory/all'
self.driver = webdriver.Chrome()
def parse_data(self):
time.sleep(5)
page = self.driver.find_elements_by_xpath('//*[@id="listAll"]/section[2]/div[2]/ul/li/div')
print(len(page))
data_list = []
time.sleep(5)
for tmp in page:
temp = {}
temp['tp'] = tmp.find_element_by_xpath('./a[1]/div[2]/div[1]/span').text
temp['owner'] = tmp.find_element_by_xpath('./a[1]/div[2]/div[1]/h3').text
temp['room'] = tmp.find_element_by_xpath('./a[1]/div[2]/div[2]/h2').text
temp['num'] = tmp.find_element_by_xpath('./a[1]/div[2]/div[2]/span').text
data_list.append(temp)
return data_list
def save_data(self,data_list):
for data in data_list:
print(data)
def run(self):
self.driver.get(self.url)
while True:
js = 'scrollTo(0,100000)'
self.driver.execute_script(js)
data_list = self.parse_data()
self.save_data(data_list)
try:
self.driver.find_element_by_xpath('//*[contains(text(),"下一页")]').click()
except:
break
if __name__ == "__main__":
douyu = Douyu()
douyu.run()
转载请注明原文地址:https://ipadbbs.8miu.com/read-51498.html