07 爬取知乎张佳玮文章

    技术2024-10-13  50

    # Author:Nimo_Ding import requests from bs4 import BeautifulSoup import csv import openpyxl # 使用headers是一种习惯 headers = { 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'} url='https://www.zhihu.com/api/v4/members/zhang-jia-wei/articles?' f=open('zjw.csv','w',encoding='utf-8') wb=openpyxl.Workbook() sheet=wb.active sheet.title='zjw_articles' sheet['A1']='标题' sheet['B1']='链接' sheet['C1']='摘要' for i in range(2): params={ 'include': 'data[*].comment_count,suggest_edit,is_normal,thumbnail_extra_info,thumbnail,can_comment,comment_permission,admin_closed_comment,content,voteup_count,created,updated,upvoted_followees,voting,review_info,is_labeled,label_info;data[*].author.badge[?(type=best_answerer)].topics', 'offset': (i+1)*20, # 第一页为20,第二页为40 'limit': 20, 'sort_by': 'created' } res=requests.get(url,params=params,headers=headers) res_json=res.json() for i in res_json['data']: print('标题为:{}\n链接为:{}\n摘要为:{}\n'.format( i['title'],i['url'],i['excerpt'] )) f.write('标题为:{}\n链接为:{}\n摘要为:{}\n'.format( i['title'],i['url'],i['excerpt'])) sheet.append([i['title'],i['url'],i['excerpt']]) f.close() wb.save('zjw.xlsx')

     

    Processed: 0.013, SQL: 9