node.js实现爬虫项目展示大作业

    技术2022-07-13  82

    一、项目要求 二、实现 1、用户可注册登录网站,非注册用户不可登录查看数据

    登录页、注册页

    <!DOCTYPE html> <html ng-app="login"> <head> <meta charset="utf-8" /> <title>Login</title> <link rel="stylesheet" href="http://cdn.bootcss.com/bootstrap/3.3.0/css/bootstrap.min.css"> <script src="https://cdn.staticfile.org/jquery/3.2.1/jquery.min.js"></script> <script src="https://cdn.staticfile.org/popper.js/1.12.5/umd/popper.min.js"></script> <script src="https://cdn.staticfile.org/twitter-bootstrap/4.1.0/js/bootstrap.min.js"></script> <!-- <script src="../node_modules/angular/angular.min.js"></script>--> <script src="/angular/angular.min.js"></script> <!-- 引入自己的样式与js--> <link rel="stylesheet" type="text/css" href="stylesheets/index.css"> <script type="text/javascript" src="javascripts/index.js"></script> <script> var app = angular.module('login', []); app.controller('loginCtrl', function ($scope, $http, $timeout) { // 登录时,检查用户输入的账户密码是否与数据库中的一致 $scope.check_pwd = function () { var data = JSON.stringify({ username: $scope.username, password: $scope.password }); $http.post("/users/login", data) .then( function (res) { if(res.data.msg=='ok') { window.location.href='/news.html'; }else{ $scope.msg=res.data.msg; } }, function (err) { $scope.msg = err.data; }); }; //增加注册用户 $scope.doAdd = function () { // 检查用户注册时,输入的两次密码是否一致 if($scope.add_password!==$scope.confirm_password){ // $timeout(function () { // $scope.msg = '两次密码不一致!'; // },100); $scope.msg = '两次密码不一致!'; } else { var data = JSON.stringify({ username: $scope.add_username, password: $scope.add_password }); $http.post("/users/register", data) .then(function (res) { if(res.data.msg=='成功注册!请登录') { $scope.msg=res.data.msg; $timeout(function () { window.location.href='index.html'; },2000); } else { $scope.msg = res.data.msg; } }, function (err) { $scope.msg = err.data; }); } }; }); </script> </head> <body> <div class="container" ng-controller="loginCtrl"> <div class="row"> <div class="col-md-6 col-md-offset-3"> <div class="panel panel-login"> <div class="panel-heading"> <div class="row"> <div class="col-xs-6"> <a href="#" class="active" id="login-form-link">Login</a> </div> <div class="col-xs-6"> <a href="#" id="register-form-link">Register</a> </div> </div> <hr> </div> <div class="panel-body"> <div class="row"> <div class="col-lg-12"> <form id="login-form" method="post" role="form" style="display: block;"> <!-- 登陆部分--> <div class="form-group"> <input ng-model="username" tabindex="1" class="form-control" placeholder="Username" value=""/> </div> <div class="form-group"> <input type="password" ng-model="password" tabindex="2" class="form-control" placeholder="Password"> </div> <!-- <div class="form-group text-center">--> <!-- <input type="checkbox" tabindex="3" class="" name="remember" id="remember">--> <!-- <label for="remember"> Remember Me</label>--> <!-- </div>--> <div class="form-group"> <div class="row"> <div class="col-sm-6 col-sm-offset-3"> <button id="login-submit" tabindex="4" class="form-control btn btn-login" ng-click="check_pwd()">LOG IN</button> </div> </div> </div> </form> <form id="register-form" method="post" role="form" style="display: none;"> <div class="form-group"> <input ng-model="add_username" tabindex="1" class="form-control" placeholder="Username" value=""/> </div> <div class="form-group"> <input type="password" ng-model="add_password" tabindex="2" class="form-control" placeholder="Password"> </div> <div class="form-group"> <input type="password" ng-model="confirm_password" tabindex="2" class="form-control" placeholder="Confirm Password"> </div> <div class="form-group"> <div class="row"> <div class="col-sm-6 col-sm-offset-3"> <button tabindex="4" class="form-control btn btn-register" ng-click="doAdd()">Register Now</button> </div> </div> </div> </form> </div> </div> </div> <!-- <div class="alert alert-warning alert-dismissible fade show">--> <!-- <button type="button" class="close" data-dismiss="alert">&times;</button>--> <!-- <strong>警告!</strong>{{msg}}--> <!-- </div>--> </div> <div class="alert alert-warning" ng-if="msg && msg!='ok'"> <a href="#" class="close" data-dismiss="alert">&times;</a> <strong>警告!</strong>{{msg}} </div> </div> </div> </div> </body>

    登录页、注册页路由

    var express = require('express'); var router = express.Router(); var userDAO = require('../dao/userDAO'); router.post('/login', function(req, res) { var username = req.body.username; var password = req.body.password; // var sess = req.session; userDAO.getByUsername(username, function (user) { if(user.length==0){ res.json({msg:'用户不存在!请检查后输入'}); }else { if(password===user[0].password){ req.session['username'] = username; res.cookie('username', username); res.json({msg: 'ok'}); // res.json({msg:'ok'}); }else{ res.json({msg:'用户名或密码错误!请检查后输入'}); } } }); }); /* add users */ router.post('/register', function (req, res) { var add_user = req.body; // 先检查用户是否存在 userDAO.getByUsername(add_user.username, function (user) { if (user.length != 0) { // res.render('index', {msg:'用户不存在!'}); res.json({msg: '用户已存在!'}); }else { userDAO.add(add_user, function (success) { res.json({msg: '成功注册!请登录'}); }) } }); }); // 退出登录 router.get('/logout', function(req, res, next){ // 备注:这里用的 session-file-store 在destroy 方法里,并没有销毁cookie // 所以客户端的 cookie 还是存在,导致的问题 --> 退出登陆后,服务端检测到cookie // 然后去查找对应的 session 文件,报错 // session-file-store 本身的bug req.session.destroy(function(err) { if(err){ res.json('退出登录失败'); return; } // req.session.loginUser = null; res.clearCookie('username'); res.json({result:'/index.html'}); }); }); module.exports = router;

    session信息

    var createError = require('http-errors'); var express = require('express'); var path = require('path'); var cookieParser = require('cookie-parser'); var session = require('express-session'); var logger = require('morgan'); var logDAO = require('./dao/logDAO.js'); // var fs = require('fs');//加了文件操作的模块 // var accessLogStream = fs.createWriteStream(path.join(__dirname, 'access.log'), { flag: 'a' });//创建一个写文件流,并且保存在当前文件夹的access.log文件中 // var indexRouter = require('./routes/users'); var usersRouter = require('./routes/users'); var newsRouter = require('./routes/news'); var app = express(); //设置session app.use(session({ secret: 'sessiontest',//与cookieParser中的一致 resave: true, saveUninitialized: false, // 是否保存未初始化的会话 cookie : { maxAge : 1000 * 60 * 60, // 设置 session 的有效时间,单位毫秒 }, })); // view engine setup // app.set('views', path.join(__dirname, 'views')); // app.set('view engine', 'ejs'); let method = ''; app.use(logger(function (tokens, req, res) { console.log('打印的日志信息:'); var request_time = new Date(); var request_method = tokens.method(req, res); var request_url = tokens.url(req, res); var status = tokens.status(req, res); var remote_addr = tokens['remote-addr'](req, res); if(req.session){ var username = req.session['username']||'notlogin'; }else { var username = 'notlogin'; } // 直接将用户操作记入mysql中 if(username!='notlogin'){ logDAO.userlog([username,request_time,request_method,request_url,status,remote_addr], function (success) { console.log('成功保存!'); }) } console.log('请求时间 = ', request_time); console.log('请求方式 = ', request_method); console.log('请求链接 = ', request_url); console.log('请求状态 = ', status); console.log('请求长度 = ', tokens.res(req, res, 'content-length'),); console.log('响应时间 = ', tokens['response-time'](req, res) + 'ms'); console.log('远程地址 = ', remote_addr); console.log('远程用户 = ', tokens['remote-user'](req, res)); console.log('http版本 = ', tokens['http-version'](req, res)); console.log('浏览器信息 = ', tokens['user-agent'](req, res)); console.log('用户 = ', username); console.log(' ===============',method); }, )); app.use(express.json()); app.use(express.urlencoded({ extended: false })); app.use(cookieParser()); app.use(express.static(path.join(__dirname, 'public'))); app.use('/angular', express.static(path.join(__dirname , '/node_modules/angular'))); // app.use('/', indexRouter); app.use('/users', usersRouter); app.use('/news', newsRouter); // 检测是否登录 // app.use(function(req, res, next) { // var url = req.url; // // 判断不拦截的路由 除/之外的都拦截 // if ( url!='/users/login' && !req.session.username) { // res.redirect('/users/login'); // }else { // next(); // } // }); // 这个中间件为什么不起作用啊 // app.use((req, res, next) => { // if (!req.session['username']) { // res.redirect('/'); // } else { // next(); // } // }); // catch 404 and forward to error handler app.use(function(req, res, next) { next(createError(404)); }); // error handler app.use(function(err, req, res, next) { // set locals, only providing error in development res.locals.message = err.message; res.locals.error = req.app.get('env') === 'development' ? err : {}; // render the error page res.status(err.status || 500); // res.render('error'); }); module.exports = app;

    2、用户注册、登录、查询等操作记入数据库中的日志

    建立两个MySQL表 user和user_action 记录用户和用户操作

    --创建用户信息数据表CREATE TABLE `crawl`.`user` ( `id` INT UNSIGNED NOT NULL AUTO_INCREMENT, `username` VARCHAR(45) NOT NULL, `password` VARCHAR(45) NOT NULL, `registertime` datetime DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (`id`), UNIQUE KEY `username_UNIQUE` (`username`))ENGINE=InnoDB DEFAULT CHARSET=utf8; --记录用户的登陆,查询(具体查询语句)操作CREATE TABLE `crawl`.`user_action` ( `id` INT UNSIGNED NOT NULL AUTO_INCREMENT, `username` VARCHAR(45) NOT NULL, `request_time` VARCHAR(45) NOT NULL, `request_method` VARCHAR(20) NOT NULL, `request_url` VARCHAR(300) NOT NULL, `status` int(4), `remote_addr` VARCHAR(100) NOT NULL, PRIMARY KEY (`id`))ENGINE=InnoDB DEFAULT CHARSET=utf8;

    3、实现查询词支持布尔表达式 (比如“新冠 AND 肺炎”或者“新冠 OR 肺炎”)

    查询页面

    <form class="form-horizontal" role="form"> <div class="row" style="margin-bottom: 10px;"> <label class="col-lg-2 control-label">标题关键字</label> <div class="col-lg-3"> <input type="text" class="form-control" placeholder="标题关键字" ng-model="$parent.title1"> </div> <div class="col-lg-1"> <select class="form-control" autocomplete="off" ng-model="$parent.selectTitle"> <option selected="selected">AND</option> <option>OR</option> </select> </div> <div class="col-lg-3"> <input type="text" class="form-control" placeholder="标题关键字" ng-model="$parent.title2"> </div> </div> <div class="row" style="margin-bottom: 10px;"> <label class="col-lg-2 control-label">内容关键字</label> <div class="col-lg-3"> <input type="text" class="form-control" placeholder="内容关键字" ng-model="$parent.content1"> </div> <div class="col-lg-1"> <select class="form-control" autocomplete="off" ng-model="$parent.selectContent"> <option selected="selected">AND</option> <option>OR</option> </select> </div> <div class="col-lg-3"> <input type="text" class="form-control" placeholder="内容关键字" ng-model="$parent.content2"> </div> </div> <div class="form-group"> <div class="col-md-offset-9"> <button type="submit" class="btn btn-default" ng-click="search()">查询</button> </div> </div> </form> <!--显示查询结果--> <div ng-show="isisshowresult"> <table class="table table-striped"> <thead> <tr> <td>序号</td> <td>标题</td> <td>作者</td> <!-- <td>内容</td>--> <td>关键词</td> <td>链接</td> <td>发布时间</td> </tr> </thead> <tbody> <tr ng-repeat="(key, item) in items"> <td>{{index+key}}</td> <td>{{item.title}}</td> <td>{{item.author}}</td> <!-- <td>{{item.content}}</td>--> <td>{{item.keywords}}</td> <td>{{item.url}}</td> <td>{{item.publish_date}}</td> </tr> </tbody> </table> <div class="row"> <!-- <div class="form-group">--> <div class="pull-left" style="margin-top: 12px;"> <button type="submit" class="btn btn-primary" ng-click="searchsortASC()" >发布时间升序</button> <button type="submit" class="btn btn-primary" ng-click="searchsortDESC()">发布时间降序</button> </div> <!-- </div>--> <div class="pull-right"> <nav> <ul class="pagination"> <li> <a ng-click="Previous()" role="button"><span role="button">上一页</span></a> </li> <li ng-repeat="page in pageList" ng-class="{active:isActivePage(page)}" role="button"> <a ng-click="selectPage(page)" >{{ page }}</a> </li> <li> <a ng-click="Next()" role="button"><span role="button">下一页</span></a> </li> </ul> </nav> </div> </div> </div>

    news.js

    var app = angular.module('news', []); app.controller('news_Ctrl', function ($scope, $http, $timeout) { // 控制查询页面是否显示 $scope.showSearch = function () { $scope.isShow = true; $scope.isisshowresult = false; // 再次回到查询页面时,表单里要保证都空的 $scope.title1=undefined; $scope.title2=undefined; $scope.selectTitle='AND'; $scope.content1=undefined; $scope.content2=undefined; $scope.selectContent='AND'; $scope.sorttime=undefined; }; $scope.logout = function () { // $http.get().then(); $http.get("/users/logout").then( function (res) { window.location.href=res.data.result; },function (err) { $scope.msg = err.data; } ); }; // 查询数据 $scope.search = function () { var title1 = $scope.title1; var title2 = $scope.title2; var selectTitle = $scope.selectTitle; var content1 = $scope.content1; var content2 = $scope.content2; var selectContent = $scope.selectContent; var sorttime = $scope.sorttime; // 检查用户传的参数是否有问题 //用户有可能这样输入:___ and/or 新冠(直接把查询词输在了第二个位置) if(typeof title1=="undefined" && typeof title2!="undefined" && title2.length>0){ title1 = title2; } if(typeof content1=="undefined" && typeof content2!="undefined" && content2.length>0){ content1 = content2; } // 用户可能一个查询词都不输入,默认就是查找全部数据 var myurl = `/news/search?t1=${title1}&ts=${selectTitle}&t2=${title2}&c1=${content1}&cs=${selectContent}&c2=${content2}&stime=${sorttime}`; $http.get(myurl).then( function (res) { if(res.data.message=='data'){ $scope.isisshowresult = true; //显示表格查询结果 // $scope.searchdata = res.data; $scope.initPageSort(res.data.result) }else { window.location.href=res.data.result; } },function (err) { $scope.msg = err.data; }); }; // 分页 $scope.initPageSort=function(item){ $scope.pageSize=5;  //每页显示的数据量,可以随意更改 $scope.selPage = 1; $scope.data = item; $scope.pages = Math.ceil($scope.data.length / $scope.pageSize); //分页数 $scope.pageList = [];//最多显示5页,后面6页之后不会全部列出页码来 $scope.index = 1; // var page = 1; // for (var i = page; i < $scope.pages+1 && i < page+5; i++) { // $scope.pageList.push(i); // } var len = $scope.pages> 5 ? 5:$scope.pages; $scope.pageList = Array.from({length: len}, (x,i) => i+1); //设置表格数据源(分页) $scope.items = $scope.data.slice(0, $scope.pageSize); }; //打印当前选中页 $scope.selectPage = function (page) { //不能小于1大于最大(第一页不会有前一页,最后一页不会有后一页) if (page < 1 || page > $scope.pages) return; //最多显示分页数5,开始分页转换 var pageList = []; if(page>2){ for (var i = page-2; i <= $scope.pages && i < page+3; i++) { pageList.push(i); } }else { for (var i = page; i <= $scope.pages && i < page+5; i++) { pageList.push(i); } } $scope.index =(page-1)*$scope.pageSize+1; $scope.pageList = pageList; $scope.selPage = page; $scope.items = $scope.data.slice(($scope.pageSize * (page - 1)), (page * $scope.pageSize));//通过当前页数筛选出表格当前显示数据 console.log("选择的页:" + page); }; //设置当前选中页样式 $scope.isActivePage = function (page) { return $scope.selPage == page; }; //上一页 $scope.Previous = function () { $scope.selectPage($scope.selPage - 1); }; //下一页 $scope.Next = function () { $scope.selectPage($scope.selPage + 1); }; $scope.searchsortASC = function () { $scope.sorttime = '1'; $scope.search(); }; $scope.searchsortDESC = function () { $scope.sorttime = '2'; $scope.search(); }; // 下面是四个图的操作 $scope.histogram = function () { $scope.isShow = false; $http.get("/news/histogram") .then( function (res) { if(res.data.message=='url'){ window.location.href=res.data.result; }else { // var newdata = washdata(data); let xdata = [], ydata = [], newdata; var pattern = /\d{4}-(\d{2}-\d{2})/; res.data.result.forEach(function (element) { // "x":"2020-04-28T16:00:00.000Z" ,对x进行处理,只取 月日 xdata.push(pattern.exec(element["x"])[1]); ydata.push(element["y"]); }); newdata = {"xdata": xdata, "ydata": ydata}; var myChart = echarts.init(document.getElementById('main1')); // 指定图表的配置项和数据 var option = { title: { text: '新闻发布数 随时间变化' }, tooltip: {}, legend: { data: ['新闻发布数'] }, xAxis: { data: newdata["xdata"] }, yAxis: {}, series: [{ name: '新闻数目', type: 'bar', data: newdata["ydata"] }] }; // 使用刚指定的配置项和数据显示图表。 myChart.setOption(option); } }, function (err) { $scope.msg = err.data; }); }; $scope.pie = function () { $scope.isShow = false; $http.get("/news/pie").then( function (res) { if(res.data.message=='url'){ window.location.href=res.data.result; }else { let newdata = []; var pattern = /责任编辑:(.+)/;//匹配名字 res.data.result.forEach(function (element) { // "x": 责任编辑:李夏君 ,对x进行处理,只取 名字 newdata.push({name: pattern.exec(element["x"])[1], value: element["y"]}); }); var myChart = echarts.init(document.getElementById('main1')); var app = {}; option = null; // 指定图表的配置项和数据 var option = { title: { text: '作者发布新闻数量', x: 'center' }, tooltip: { trigger: 'item', formatter: "{a} <br/>{b} : {c} ({d}%)" }, legend: { orient: 'vertical', left: 'left', // data: ['直接访问', '邮件营销', '联盟广告', '视频广告', '搜索引擎'] }, series: [ { name: '访问来源', type: 'pie', radius: '55%', center: ['50%', '60%'], data: newdata, itemStyle: { emphasis: { shadowBlur: 10, shadowOffsetX: 0, shadowColor: 'rgba(0, 0, 0, 0.5)' } } } ] }; // myChart.setOption(option); app.currentIndex = -1; setInterval(function () { var dataLen = option.series[0].data.length; // 取消之前高亮的图形 myChart.dispatchAction({ type: 'downplay', seriesIndex: 0, dataIndex: app.currentIndex }); app.currentIndex = (app.currentIndex + 1) % dataLen; // 高亮当前图形 myChart.dispatchAction({ type: 'highlight', seriesIndex: 0, dataIndex: app.currentIndex }); // 显示 tooltip myChart.dispatchAction({ type: 'showTip', seriesIndex: 0, dataIndex: app.currentIndex }); }, 1000); if (option && typeof option === "object") { myChart.setOption(option, true); } ; } }); }; $scope.line = function () { $scope.isShow = false; $http.get("/news/line").then( function (res) { if(res.data.message=='url'){ window.location.href=res.data.result; }else { var myChart = echarts.init(document.getElementById("main1")); option = { title: { text: '"疫情"该词在新闻中的出现次数随时间变化图' }, xAxis: { type: 'category', data: Object.keys(res.data.result) }, yAxis: { type: 'value' }, series: [{ data: Object.values(res.data.result), type: 'line', itemStyle: {normal: {label: {show: true}}} }], }; if (option && typeof option === "object") { myChart.setOption(option, true); } } }); }; $scope.wordcloud = function () { $scope.isShow = false; $http.get("/news/wordcloud").then( function (res) { if(res.data.message=='url'){ window.location.href=res.data.result; }else { var mainContainer = document.getElementById('main1'); var chart = echarts.init(mainContainer); var data = []; for (var name in res.data.result) { data.push({ name: name, value: Math.sqrt(res.data.result[name]) }) } var maskImage = new Image(); maskImage.src = './images/logo.png'; var option = { title: { text: '所有新闻内容 jieba分词 的词云展示' }, series: [{ type: 'wordCloud', sizeRange: [12, 60], rotationRange: [-90, 90], rotationStep: 45, gridSize: 2, shape: 'circle', maskImage: maskImage, drawOutOfBound: false, textStyle: { normal: { fontFamily: 'sans-serif', fontWeight: 'bold', // Color can be a callback function or a color string color: function () { // Random color return 'rgb(' + [ Math.round(Math.random() * 160), Math.round(Math.random() * 160), Math.round(Math.random() * 160) ].join(',') + ')'; } }, emphasis: { shadowBlur: 10, shadowColor: '#333' } }, data: data }] }; maskImage.onload = function () { // option.series[0].data = data; chart.clear(); chart.setOption(option); }; window.onresize = function () { chart.resize(); }; } }); } });

    DAO.js

    var mysql = require('mysql'); var mysqlConf = require('../conf/mysqlConf'); var pool = mysql.createPool(mysqlConf.mysql); // 使用了连接池,重复使用数据库连接,而不必每执行一次CRUD操作就获取、释放一次数据库连接,从而提高了对数据库操作的性能。 module.exports = { query_noparam :function(sql, callback) { pool.getConnection(function(err, conn) { if (err) { callback(err, null, null); } else { conn.query(sql, function(qerr, vals, fields) { conn.release(); //释放连接 callback(qerr, vals, fields); //事件驱动回调 }); } }); }, search :function(searchparam, callback) { // 组合查询条件 var sql = 'select * from fetches '; if(searchparam["t2"]!="undefined"){ sql +=(`where title like '%${searchparam["t1"]}%' ${searchparam['ts']} title like '%${searchparam["t2"]}%' `); }else if(searchparam["t1"]!="undefined"){ sql +=(`where title like '%${searchparam["t1"]}%' `); }; if(searchparam["t1"]=="undefined"&&searchparam["t2"]=="undefined"&&searchparam["c1"]!="undefined"){ sql+='where '; }else if(searchparam["t1"]!="undefined"&&searchparam["c1"]!="undefined"){ sql+='and '; } if(searchparam["c2"]!="undefined"){ sql +=(`content like '%${searchparam["c1"]}%' ${searchparam['cs']} content like '%${searchparam["c2"]}%' `); }else if(searchparam["c1"]!="undefined"){ sql +=(`content like '%${searchparam["c1"]}%' `); } if(searchparam['stime']!="undefined"){ if(searchparam['stime']=="1"){ sql+='ORDER BY publish_date ASC '; }else { sql+='ORDER BY publish_date DESC '; } } sql+=';'; pool.getConnection(function(err, conn) { if (err) { callback(err, null, null); } else { conn.query(sql, function(qerr, vals, fields) { conn.release(); //释放连接 callback(qerr, vals, fields); //事件驱动回调 }); } }); }, };

    三、总结 这学期首次接触web编程,大部分代码都借鉴老师。希望以后能不断学习不断进步。

    Processed: 0.031, SQL: 9