实验要求 实验过程 1.在mysql中新建两个表,用来记录用户信息和操作日志。
记录用户信息的表user:
CREATE TABLE `crawl`.`user` ( `id` INT UNSIGNED NOT NULL AUTO_INCREMENT, `username` VARCHAR(45) NOT NULL, `password` VARCHAR(45) NOT NULL, `registertime` datetime DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (`id`), UNIQUE KEY `username_UNIQUE` (`username`)) ENGINE=InnoDB DEFAULT CHARSET=utf8;记录操作日志的表user_action:
CREATE TABLE `crawl`.`user_action` ( `id` INT UNSIGNED NOT NULL AUTO_INCREMENT, `username` VARCHAR(45) NOT NULL, `request_time` VARCHAR(45) NOT NULL, `request_method` VARCHAR(20) NOT NULL, `request_url` VARCHAR(300) NOT NULL, `status` int(4), `remote_addr` VARCHAR(100) NOT NULL, PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=utf8;建立连接mysql配置文件 mysqlConf.js
module.exports = { mysql: { host: 'localhost', user: 'root', password: 'root' database: 'crawl' connectionLimit: 10 //最大连接数,默认为10 } };2.实现用户注册和登录 引入前端框架angular
<script src="/angular/angular.min.js"></script>引入自己的样式与js
<link rel="stylesheet" type="text/css" href="stylesheets/index.css"> <script type="text/javascript" src="javascripts/index.js"></script> <script> var app = angular.module('login', []); app.controller('loginCtrl', function ($scope, $http, $timeout) { // 登录时,检查用户输入的账户密码是否与数据库中的一致 $scope.check_pwd = function () { var data = JSON.stringify({ username: $scope.username, password: $scope.password }); $http.post("/users/login", data) .then( function (res) { if(res.data.msg=='ok') { window.location.href='/news.html'; } else{ $scope.msg=res.data.msg; } }, function (err) { $scope.msg = err.data; }; }; //增加注册用户 $scope.doAdd = function () { // 检查用户注册时,输入的两次密码是否一致 if($scope.add_password!==$scope.confirm_password){ $scope.msg = '两次密码不一致!'; } else { var data = JSON.stringify({ username: $scope.add_username, password: $scope.add_password }); $http.post("/users/register", data) .then(function (res) { if(res.data.msg=='成功注册!请登录') { $scope.msg=res.data.msg; $timeout(function () { window.location.href='index.html'; },2000); } else { $scope.msg = res.data.msg; } }, function (err) { $scope.msg = err.data; }); } }; });引入angular.js后,登录时检测user数据表中是否存在该用户名与密码,如果登录成功会跳转到news.html页面,用户则可以看到主界面。
设置登录页的路由 调用userDAO,因为session信息只保存在启动网站内存中,所以要注意保存session信息,记录用户的操作日志
router.post('/login', function(req, res) { var username = req.body.username; var password = req.body.password; userDAO.getByUsername(username, function (user) { if(user.length==0){ res.json({msg:'用户不存在!请检查后输入'}); }else { if(password===user[0].password){ req.session['username'] = username; res.cookie('username', username); res.json({msg: 'ok'}); // res.json({msg:'ok'}); }else{res.json({msg:'用户名或密码错误!请检查后输入'}); } } }); });调用的userDAO代码:
var mysql = require('mysql'); var mysqlConf = require('../conf/mysqlConf'); var userSqlMap = require('./userSqlMap'); var pool = mysql.createPool(mysqlConf.mysql); module.exports = { add: function (user, callback) { pool.query(userSqlMap.add, [user.username, user.password], function (error, result) { if (error) throw error; callback(result.affectedRows > 0); }); }, getByUsername: function (username, callback) { pool.query(userSqlMap.getByUsername, [username], function (error, result) { if (error) throw error; callback(result); }); }, };最终结果如下:
3.实现查询词支持布尔表达式
在news.html引入查询页面
<div ng-include="'search.html'"></div>search.html代码: 设置查询界面按钮
<form class="form-horizontal" role="form"> <div class="row" style="margin-bottom: 10px;"> <label class="col-lg-2 control-label">标题关键字</label> <div class="col-lg-3"> <input type="text" class="form-control" placeholder="标题关键字" ng-model="$parent.title1"> </div> <div class="col-lg-1"> <select class="form-control" autocomplete="off" ng-model="$parent.selectTitle"> <option selected="selected">AND</option> <option>OR</option> </select> </div> <div class="col-lg-3"> <input type="text" class="form-control" placeholder="标题关键字" ng-model="$parent.title2"> </div> </div> <div class="row" style="margin-bottom: 10px;"> <label class="col-lg-2 control-label">内容关键字</label> <div class="col-lg-3"> <input type="text" class="form-control" placeholder="内容关键字" ng-model="$parent.content1"> </div> <div class="col-lg-1"> <select class="form-control" autocomplete="off" ng-model="$parent.selectContent"> <option selected="selected">AND</option> <option>OR</option> </select> </div> <div class="col-lg-3"> <input type="text" class="form-control" placeholder="内容关键字" ng-model="$parent.content2"> </div> </div> <div class="form-group"> <div class="col-md-offset-9"> <button type="submit" class="btn btn-default" ng-click="search()">查询</button> </div> </div> </form>显示查询内容结果:
<div ng-show="isisshowresult"> <table class="table table-striped"> <thead> <tr> <td>序号</td> <td>标题</td> <td>作者</td> <td>关键词</td> <td>链接</td> <td>发布时间</td> </tr> <td>关键词</td> <td>链接</td> <td>发布时间</td> </tr> <td>{{item.keywords}}</td> <td>{{item.url}}</td> <td>{{item.publish_date}}</td> </tr> </tbody> </table> <div class="row"> <div class="pull-left" style="margin-top: 12px;"> <button type="submit" class="btn btn-primary" ng-click="searchsortASC()" >发布时间升序</button> <button type="submit" class="btn btn-primary" ng-click="searchsortDESC()">发布时间降序</button> </div> <div class="pull-right"> <nav> <ul class="pagination"> <li> <a ng-click="Previous()" role="button"><span role="button">上一页</span></a> </li> <li ng-repeat="page in pageList" ng-class="{active:isActivePage(page)}" role="button"> <a ng-click="selectPage(page)" >{{ page }}</a> </li> <li> <a ng-click="Next()" role="button"><span role="button">下一页</span></a> </li> </ul> </nav> </div> </div> </div>后端news.js代码: 将用户输入的数据用get方式传给后端处理。对于用户的输入,若一个词都不输入,则输出全部数据。
$scope.search = function () { var title1 = $scope.title1; var title2 = $scope.title2; var selectTitle = $scope.selectTitle; var content1 = $scope.content1; var content2 = $scope.content2; var selectContent = $scope.selectContent; var sorttime = $scope.sorttime; if(typeof title1=="undefined" && typeof title2!="undefined" && title2.length>0){ title1 = title2; } if(typeof content1=="undefined" && typeof content2!="undefined" && content2.length>0){ content1 = content2; } var myurl = `/news/search?t1=${title1}&ts=${selectTitle}&t2=${title2}&c1=${content1}&cs=${selectContent}&c2=${content2} $stime=${sorttime}`; $http.get(myurl).then( function (res) { if(res.data.message=='data'){ $scope.isisshowresult = true; // $scope.searchdata = res.data; $scope.initPageSort(res.data.result) }else { window.location.href=res.data.result; } },function (err) { $scope.msg = err.data; }); };设置查询页路由:
router.get('/search', function(request, response) { console.log(request.session['username']); if (request.session['username']===undefined) { response.json({message:'url',result:'/index.html'}); }else { var param = request.query; newsDAO.search(param,function (err, result, fields) { response.json({message:'data',result:result}); }) } });newsDAO.search用来拼起sql
search :function(searchparam, callback) { // 组合查询条件 var sql = 'select * from fetches '; if(searchparam["t2"]!="undefined"){ sql +=(`where title like '%${searchparam["t1"]}%' ${searchparam['ts']} title like '%${searchparam["t2"]}%' `); }else if(searchparam["t1"]!="undefined"){ sql +=(`where title like '%${searchparam["t1"]}%' `); }; if(searchparam["t1"]=="undefined"&&searchparam["t2"]=="undefined"&&searchparam["c1"]!="undefined"){ sql+='where '; }else if(searchparam["t1"]!="undefined"&&searchparam["c1"]!="undefined"){ sql+='and '; } if(searchparam["c2"]!="undefined"){ sql +=(`content like '%${searchparam["c1"]}%' ${searchparam['cs']} content like '%${searchparam["c2"]}%' `); }else if(searchparam["c1"]!="undefined"){ sql +=(`content like '%${searchparam["c1"]}%' `); } if(searchparam['stime']!="undefined"){ if(searchparam['stime']=="1"){ sql+='ORDER BY publish_date ASC '; }else { sql+='ORDER BY publish_date DESC '; } } sql+=';'; pool.getConnection(function(err, conn) { if (err) { callback(err, null, null); } else { conn.query(sql, function(qerr, vals, fields) { conn.release(); //释放连接 callback(qerr, vals, fields); //事件驱动回调 }); } }); }, };分页显示查询结果:
$scope.initPageSort=function(item){ $scope.pageSize=5; //每页显示的数据量,可以随意更改 $scope.selPage = 1; $scope.data = item; $scope.pages = Math.ceil($scope.data.length / $scope.pageSize); $scope.pageList = []; $scope.index = 1; var len = $scope.pages> 5 ? 5:$scope.pages; $scope.pageList = Array.from({length: len}, (x,i) => i+1); $scope.items = $scope.data.slice(0, $scope.pageSize); }; $scope.selectPage = function (page) { if (page < 1 || page > $scope.pages) return; var pageList = []; if(page>2){ for (var i = page-2; i <= $scope.pages && i < page+3; i++) { pageList.push(i); } }else { for (var i = page; i <= $scope.pages && i < page+5; i++) { pageList.push(i); } } $scope.index =(page-1)*$scope.pageSize+1; $scope.pageList = pageList; $scope.selPage = page; $scope.items = $scope.data.slice(($scope.pageSize * (page - 1)), (page * $scope.pageSize)); console.log("选择的页:" + page); };最终结果:
4.用Echarts实现了3个数据分析图表 (因为没有持续爬取数据,所以最后的图表不准确) 路由代码:
router.get('/histogram', function(request, response) { console.log(request.session['username']); if (request.session['username']===undefined) { // response.redirect('/index.html') response.json({message:'url',result:'/index.html'}); }else { var fetchSql = "select publish_date as x,count(publish_date) as y from fetches group by publish_date order by publish_date;"; newsDAO.query_noparam(fetchSql, function (err, result, fields) { response.writeHead(200, { "Content-Type": "application/json", "Cache-Control": "no-cache, no-store, must-revalidate", "Pragma": "no-cache", "Expires": 0 }); response.write(JSON.stringify({message:'data',result:result})); response.end(); }); } }); router.get('/line', function(request, response) { console.log(request.session['username']); if (request.session['username']===undefined) { // response.redirect('/index.html') response.json({message:'url',result:'/index.html'}); }else { var keyword = '疫情'; var fetchSql = "select content,publish_date from fetches where content like'%" + keyword + "%' order by publish_date;"; newsDAO.query_noparam(fetchSql, function (err, result, fields) { response.writeHead(200, { "Content-Type": "application/json", "Cache-Control": "no-cache, no-store, must-revalidate", "Pragma": "no-cache", "Expires": 0 }); response.write(JSON.stringify({message:'data',result:myfreqchangeModule.freqchange(result, keyword)})); response.end(); }); } }); router.get('/wordcloud', function(request, response) { console.log(request.session['username']); if (request.session['username']===undefined) { // response.redirect('/index.html') response.json({message:'url',result:'/index.html'}); }else { var fetchSql = "select content from fetches;"; newsDAO.query_noparam(fetchSql, function (err, result, fields) { response.writeHead(200, { "Content-Type": "application/json", "Cache-Control": "no-cache, no-store, must-revalidate", "Pragma": "no-cache", "Expires": 0 }); response.write(JSON.stringify({message:'data',result:mywordcutModule.wordcut(result)}));//返回处理过的数据 response.end(); }); } });柱状图:
$scope.histogram = function () { $scope.isShow = false; $http.get("/news/histogram") .then( function (res) { if(res.data.message=='url'){ window.location.href=res.data.result; }else { let xdata = [], ydata = [], newdata; var pattern = /\d{4}-(\d{2}-\d{2})/; res.data.result.forEach(function (element) { // "x":"2020-04-28T16:00:00.000Z" ,对x进行处理,只取 月日 xdata.push(pattern.exec(element["x"])[1]); ydata.push(element["y"]); }); newdata = {"xdata": xdata, "ydata": ydata}; ar myChart = echarts.init(document.getElementById('main1')); var option = { title: { text: '新闻发布数 随时间变化' }, tooltip: {}, legend: { data: ['新闻发布数'] }, xAxis: { data: newdata["xdata"] }, yAxis: {}, series: [{ name: '新闻数目', type: 'bar', data: newdata["ydata"] }] }; // 使用刚指定的配置项和数据显示图表。 myChart.setOption(option); } }, function (err) { $scope.msg = err.data; }); };折线图:
$scope.line = function () { $scope.isShow = false; $http.get("/news/line").then( function (res) { if(res.data.message=='url'){ window.location.href=res.data.result; }else { var myChart = echarts.init(document.getElementById("main1")); option = { title: { text: '"疫情"该词在新闻中的出现次数随时间变化图' }, xAxis: { type: 'category', data: Object.keys(res.data.result) }, yAxis: { type: 'value' }, series: [{ data: Object.values(res.data.result), type: 'line', itemStyle: {normal: {label: {show: true}}} }], }; if (option && typeof option === "object") { myChart.setOption(option, true); } } }); };查看不同的关键词随时间出现次数需要需要在路由代码中修改keyword的关键词
词云:
$scope.wordcloud = function () { $scope.isShow = false; $http.get("/news/wordcloud").then( function (res) { if(res.data.message=='url'){ window.location.href=res.data.result; }else { var mainContainer = document.getElementById('main1'); var chart = echarts.init(mainContainer); var data = []; for (var name in res.data.result) { data.push({ name: name, value: Math.sqrt(res.data.result[name]) }) } var maskImage = new Image(); maskImage.src = './images/logo.png'; var option = { title: { text: '所有新闻内容 jieba分词 的词云展示' }, series: [{ type: 'wordCloud', sizeRange: [12, 60], rotationRange: [-90, 90], rotationStep: 45, gridSize: 2, shape: 'circle', maskImage: maskImage, drawOutOfBound: false, textStyle: { normal: { fontFamily: 'sans-serif', fontWeight: 'bold', // Color can be a callback function or a color string color: function () { // Random color return 'rgb(' + [ Math.round(Math.random() * 160), Math.round(Math.random() * 160), Math.round(Math.random() * 160) ].join(',') + ')'; } }, emphasis: { shadowBlur: 10, shadowColor: '#333' } }, data: data }] }; maskImage.onload = function () { // option.series[0].data = data; chart.clear(); chart.setOption(option); }; window.onresize = function () { chart.resize(); }; } }); }修改 maskImage.src = './images/logo.png’可以以其他图片样式显示词云
5.将用户操作记录数据库 直接引入var logger = require('morgan’);保存信息
app.use(logger(function (tokens, req, res) { console.log('打印的日志信息:'); var request_time = new Date(); var request_method = tokens.method(req, res); var request_url = tokens.url(req, res); var status = tokens.status(req, res); var remote_addr = tokens['remote-addr'](req, res); if(req.session){ var username = req.session['username']||'notlogin'; }else { var username = 'notlogin'; } if(username!='notlogin'){ logDAO.userlog([username,request_time,request_method,request_url,status,remote_addr], function (success) { console.log('成功保存!'); }) }
实验总结 这次试验原以为有了上次的积累会容易一些,没想到真正自己操作时还是无从下手,只写了一些断断续续的代码,最终还是在老师的代码示例帮助下完成了实验,这次最重要的经验在于实验前要把整体思路想好,不同功能的代码有序放在不同的子文件夹。自己的代码能力也还存在很大不足,其中Echarts实现饼状图时因为爬取的网站中作者名称的表现形式不同,想要做一个关键词的饼状图因为两边参数修改的问题也失败了。