大部分注释都有
import requests from lxml import etree from selenium import webdriver class Music: def __init__(self): self.driver=webdriver.Chrome() #该url为网易云音乐所有分类首页 self.url="https://music.163.com/#/discover/playlist/" #该方法为点击选择分类后,跳出具体分类 def go_homeoage(self): self.driver.get(self.url) self.driver.switch_to_frame("contentFrame") self.driver.find_elements_by_id("cateToggleLink")[0].click() #跳到具体分类页面后,获取每个分类的url地址 def get_url_list(self): url_list=[] dd_list= self.driver.find_elements_by_xpath("//div[@id='cateListBox']//dd") print(len(dd_list)) for dd in dd_list: aa_list=dd.find_elements_by_xpath(".//a") for aa in aa_list: dict={} dict["category"]=aa.text dict["href"]=aa.get_attribute("href") url_list.append(dict) return url_list #爬取每个分类的url def go_every_url_list(self,url_list): for url in url_list: address=url["href"] self.driver.get(address) self.driver.switch_to_frame("contentFrame") self.get_content_list() # print(len(self.driver.find_elements_by_xpath("//a[@class='zbtn znxt']"))) next_page = self.driver.find_elements_by_xpath("//a[@class='zbtn znxt']") next_page=next_page[0]if len(next_page)>0 else None #如果有下一页,继续爬取 while(next_page!=None): # 由于下一页按钮被遮挡,所以采用下述方法点击下一页 self.driver.execute_script('arguments[0].click()', next_page) #爬取下一页内容 self.get_content_list() #爬完以后看有没有下一页 next_page = self.driver.find_elements_by_xpath("//a[@class='zbtn znxt']") next_page = next_page[0] if len(next_page) > 0 else None self.driver.quit() #进入到每个分类的url后,爬取具体歌单信息 def get_content_list(self): li_list=self.driver.find_elements_by_xpath("//ul[@class='m-cvrlst f-cb']//li") content_list=[] for li in li_list: dict={} dict["title"]=li.find_element_by_xpath("./p[@class='dec']/a").text dict["author"]=li.find_element_by_xpath(".//a[@class='nm nm-icn f-thide s-fc3']").text content_list.append(dict) print(content_list) #程序逻辑 def run(self): self.go_homeoage() url_list=self.get_url_list() self.go_every_url_list(url_list) if __name__ == '__main__': mus=Music() mus.run()