爬取大街网keyword为python或Java等招聘信息,并写入到xls表格中保存

    技术2026-01-28  7

    import requests import xlwt rowNum=0 ####爬取大街网keyword为python或Java等招聘信息,并写入到xls表格中 #请求大街网的招聘信息 def main(url): firstUrl = 'https://so.dajie.com/job/search' session = requests.session() session.get(firstUrl) session.headers['referer'] = firstUrl response = session.get(url) return response.json()["data"] #将json中的职位、工资、公司和地点写入xls中 # writeXls(sht1, dataT["list"], dataT["total"], rowNum) def writeXls(sht1,data,total,rowNum): for i in data: # print(len(data),len(i),i) if(rowNum < int(total)): rowNum = rowNum + 1 print(rowNum,i['jobName'],i['salary'],i['compName'],i['pubCity']) sht1.write(rowNum, 0, rowNum) sht1.write(rowNum, 1, i['jobName']) sht1.write(rowNum, 2, i['salary']) sht1.write(rowNum, 3, i['compName']) sht1.write(rowNum, 4, i['pubCity']) return rowNum #创建xls def createXls(): xls = xlwt.Workbook() return xls #创建xls中的sheet并命名 def createSheet(xls,keyTxt): sht1 = xls.add_sheet(keyTxt) sht1.write(0,0,'序号') sht1.write(0,1,'职位') sht1.write(0,2,'工资') sht1.write(0,3,'公司') sht1.write(0,4,'地点') return sht1 #保存xls def saveXls(xls): xls.save('/Users/User/Downloads/mydata.xls') if __name__ == '__main__': keyTxt = 'Java' url1 = 'https://so.dajie.com/job/ajax/search/' \ 'filter?keyword=' \ +keyTxt+ \ '&order=0&city=&recruitType=&salary=&experience=&page=1&positionFunction=&' \ '_CSRFToken=&ajax=1' data1 = main(url1) totalPage=data1["totalPage"] #获取keyTxt有多好页,便于后续爬后面页面的内容 xls = createXls() sheet = createSheet(xls,keyTxt) rowNum = writeXls(sheet, data1["list"], data1["total"], rowNum) print('XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX:' , rowNum) for i in range(totalPage-1): page=str(i+2) urlNext = 'https://so.dajie.com/job/ajax/search/' \ 'filter?keyword=' \ +keyTxt+ \ '&order=0&city=&recruitType=&salary=&experience=&page=' \ + page +\ '&positionFunction=&' \ '_CSRFToken=&ajax=1' print(urlNext) data=main(urlNext) #爬虫获取内容 rowNum = writeXls(sheet, data["list"], data["total"], rowNum) #将爬虫爬取的内容写入xls中表格中的值 saveXls(xls)

    Processed: 0.015, SQL: 10