from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from pandas import DataFrame import time browse=webdriver.Chrome()# def web(www): #定义一个获取网页的函数 browse.get(www) time.sleep(20) Tao_price=[] #//[@id=“js-player-title”]/div/div[2]/div[1]/div[2]/div[1]/h3 a=browse.find_element_by_xpath(’//[@id=“js-player-title”]/div/div[2]/div[1]/div/div[1]/h3’) Tao_price.append(a.text) fd=[]
b=browse.find_element_by_xpath('//*[@id="js-player-barrage"]').text b=b.split() b=len(b) fd.append(b) Tao_price2=[] #//*[@id="js-player-title"]/div/div[2]/div[1]/div[1]/div/div[2]/span c=browse.find_element_by_xpath('//*[@id="js-player-title"]/div/div[2]/div[1]/div[1]/div/div[2]/span') Tao_price2.append(c.text) Tao_price3=[] d=browse.find_element_by_xpath('//*[@id="js-player-asideMain"]/div/div[1]/div[3]/div/div/div/div[1]/ul/li[2]/div/em') Tao_price3.append(d.text) Taobao=DataFrame() Taobao['标题']=Tao_price Taobao['弹幕量']=fd Taobao['关注人数']=Tao_price2 Taobao['贵宾人数']=Tao_price3 return Taobaogg=DataFrame() for i in gg5: try:
mm='https://www.douyu.com{}'.format(i) # gg.append(mm) df_page=web(mm) gg=gg.append(df_page,ignore_index=True) except: passbrowse.get(www) gg gg.to_excel(“C:\英雄时刻\Python\新建文件夹 (2)\新建 XLSX 工作表.xlsx”)
