亲测有效,仅供学习~
from urllib import request import re header={"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"} #https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=recommend&page_limit=20&page_start=0 #https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=recommend&page_limit=20&page_start=20 #https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=recommend&page_limit=20&page_start=40 score_list=[] movie_name_list=[] page=int(input("请输入需要爬取的页数")) for i in range(page): url = "https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=recommend&page_limit=20&page_start="+str(i*20) req = request.Request(url,headers=header) resp=request.urlopen(req).read().decode() #"rate":"9.3","cover_x":1100,"title":"海上钢琴师" patten_score=r'"rate":"(.*?)"' patten_name=r'"title":"(.*?)"' score=re.findall(patten_score,resp) movie_name=re.findall(patten_name,resp) score_list.extend(score) movie_name_list.extend(movie_name) for j in range(len(movie_name_list)): print("热度排名第",str(j+1)," 电影名称:",str(movie_name_list[j]),",电影评分:",str(score_list[j]))