import requests
import xlwt
rowNum=0
####爬取大街网keyword为python或Java等招聘信息,并写入到xls表格中
#请求大街网的招聘信息
def main(url):
firstUrl = 'https://so.dajie.com/job/search'
session = requests.session()
session.get(firstUrl)
session.headers['referer'] = firstUrl
response = session.get(url)
return response.json()["data"]
#将json中的职位、工资、公司和地点写入xls中
# writeXls(sht1, dataT["list"], dataT["total"], rowNum)
def writeXls(sht1,data,total,rowNum):
for i in data:
# print(len(data),len(i),i)
if(rowNum < int(total)):
rowNum = rowNum + 1
print(rowNum,i['jobName'],i['salary'],i['compName'],i['pubCity'])
sht1.write(rowNum, 0, rowNum)
sht1.write(rowNum, 1, i['jobName'])
sht1.write(rowNum, 2, i['salary'])
sht1.write(rowNum, 3, i['compName'])
sht1.write(rowNum, 4, i['pubCity'])
return rowNum
#创建xls
def createXls():
xls = xlwt.Workbook()
return xls
#创建xls中的sheet并命名
def createSheet(xls,keyTxt):
sht1 = xls.add_sheet(keyTxt)
sht1.write(0,0,'序号')
sht1.write(0,1,'职位')
sht1.write(0,2,'工资')
sht1.write(0,3,'公司')
sht1.write(0,4,'地点')
return sht1
#保存xls
def saveXls(xls):
xls.save('/Users/User/Downloads/mydata.xls')
if __name__ == '__main__':
keyTxt = 'Java'
url1 = 'https://so.dajie.com/job/ajax/search/' \
'filter?keyword=' \
+keyTxt+ \
'&order=0&city=&recruitType=&salary=&experience=&page=1&positionFunction=&' \
'_CSRFToken=&ajax=1'
data1 = main(url1)
totalPage=data1["totalPage"] #获取keyTxt有多好页,便于后续爬后面页面的内容
xls = createXls()
sheet = createSheet(xls,keyTxt)
rowNum = writeXls(sheet, data1["list"], data1["total"], rowNum)
print('XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX:' , rowNum)
for i in range(totalPage-1):
page=str(i+2)
urlNext = 'https://so.dajie.com/job/ajax/search/' \
'filter?keyword=' \
+keyTxt+ \
'&order=0&city=&recruitType=&salary=&experience=&page=' \
+ page +\
'&positionFunction=&' \
'_CSRFToken=&ajax=1'
print(urlNext)
data=main(urlNext) #爬虫获取内容
rowNum = writeXls(sheet, data["list"], data["total"], rowNum) #将爬虫爬取的内容写入xls中表格中的值
saveXls(xls)