5.3 网络数据Mysql存储

    技术2022-07-10  140

    针对较大规模的数据,可使用数据库进行存储。数据库是基于数据结构来组织、存储和管理数据的仓库,每个数据库都有一个或多个不同的API用于创建、访问、管理、搜索和复制所存储的数据。目前,Oracle旗下的Mysql是最流行的关系型数据库管理系统,其具有体积小、性能高、开源免费等特点。 本小节以程序5-3来演示如何将采集到的药监局国产药品数据存储到Mysql数据库中(读者需具备Java、Spring、SpringMVC、Mybatis以及Mysql基础)。请读者自行搭建SSM框架,然后参照下述三层代码将药监局国产药品数据插入到数据表中。

    //程序5-3 //Controller层 @RequestMapping("/drug") public class DrugController { @Autowired DrugImpl drugImpl; @RequestMapping(value = "/drugAdd", method = RequestMethod.POST) @ResponseBody public void drugAdd(@RequestBody MedicineParams params) { if (params.getStart() != null && params.getEnd() != null) { drugImpl.drugAdd(params); } } } //Service层 public interface IDrug { void drugAdd(MedicineParams params); } @Service public class DrugImpl implements IDrug { @Autowired private DrugMapper drugMapper; @Override public void drugAdd(MedicineParams params) { WebDriver driver = WebDriverUtils.getWebDriver(); List<Map<String, String>> excelMapList = new ArrayList<>(); for (int id = params.getStart(); id <= params.getEnd(); id++) { driver.get(PathConstant.drug + id); String source = driver.getPageSource(); Document document = Jsoup.parse(source); Elements tbody = document.select("body > div > div > table:nth-child(1) > tbody"); Elements trs = tbody.get(0).getElementsByTag("tr"); //该id有数据 if (trs.size() > 4) { //药监局数据入mysql数据库,无需对字段进行处理 Map<String, String> mysqlMap = new HashMap<>(); MysqlUtils.saveInMysql(trs, id, 3, mysqlMap); drugMapper.insertSelective(mysqlMap); }else{ //保存空id } } driver.quit(); } } public class MysqlUtils { /** * 将药监局数据保存到mysql的通用方法 * * @param trs 含有需要保存数据的Elements对象 * @param id 药监局网址中的id * @param tailCount Elements对象中需要舍弃的子节点个数 */ public static void saveInMysql(Elements trs, int id, int tailCount,Map<String,String> map) { map.put("id",Integer.valueOf(id).toString()); for(int i = 1;i < trs.size() - tailCount;i++){ map.put("f" + i, Jsoup.parse(trs.get(i).child(1).text()).text()); } } } public class WebDriverUtils { public static WebDriver getWebDriver(){ System.setProperty("webdriver.chrome.driver","/Users/steven/Documents/chrome/chromedriver"); //声明使用的是谷歌浏览器 ChromeOptions chromeOptions=new ChromeOptions(); //实例化一个Chrome浏览器的实例 WebDriver driver = new ChromeDriver(); //设置打开的浏览器窗口最大化 driver.manage().window().maximize(); //设置隐性的等待时间 driver.manage().timeouts().pageLoadTimeout(30, TimeUnit.SECONDS); return driver; } } //Dao层 public interface DrugMapper { int insertSelective(Map<String,String> record); } <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd"> <mapper namespace="com.wind.medicineSpider.dao.mappers.DrugMapper"> <insert id="insertSelective" parameterType="HashMap"> insert into drug <trim prefix="(" suffix=")" suffixOverrides=","> <if test="id != null"> id, </if> <if test="f1 != null"> f1, </if> <if test="f2 != null"> f2, </if> <if test="f3 != null"> f3, </if> <if test="f4 != null"> f4, </if> <if test="f5 != null"> f5, </if> <if test="f6 != null"> f6, </if> <if test="f7 != null"> f7, </if> <if test="f8 != null"> f8, </if> <if test="f9 != null"> f9, </if> <if test="f10 != null"> f10, </if> <if test="f11 != null"> f11, </if> <if test="f12 != null"> f12, </if> <if test="f13 != null"> f13, </if> <if test="f14 != null"> f14, </if> </trim> <trim prefix="values (" suffix=")" suffixOverrides=","> <if test="id != null"> #{id,jdbcType=INTEGER}, </if> <if test="f1 != null"> #{f1,jdbcType=VARCHAR}, </if> <if test="f2 != null"> #{f2,jdbcType=VARCHAR}, </if> <if test="f3 != null"> #{f3,jdbcType=VARCHAR}, </if> <if test="f4 != null"> #{f4,jdbcType=VARCHAR}, </if> <if test="f5 != null"> #{f5,jdbcType=VARCHAR}, </if> <if test="f6 != null"> #{f6,jdbcType=VARCHAR}, </if> <if test="f7 != null"> #{f7,jdbcType=VARCHAR}, </if> <if test="f8 != null"> #{f8,jdbcType=VARCHAR}, </if> <if test="f9 != null"> #{f9,jdbcType=VARCHAR}, </if> <if test="f10 != null"> #{f10,jdbcType=VARCHAR}, </if> <if test="f11 != null"> #{f11,jdbcType=VARCHAR}, </if> <if test="f12 != null"> #{f12,jdbcType=VARCHAR}, </if> <if test="f13 != null"> #{f13,jdbcType=VARCHAR}, </if> <if test="f14 != null"> #{f14,jdbcType=VARCHAR}, </if> </trim> </insert> </mapper> //mysql脚本 DROP TABLE IF EXISTS `drug`; CREATE TABLE `drug` ( `id` int(6) NOT NULL, `f1` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, `f2` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, `f3` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, `f4` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, `f5` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, `f6` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, `f7` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, `f8` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, `f9` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, `f10` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, `f11` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, `f12` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, `f13` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, `f14` varchar(500) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL, PRIMARY KEY (`id`) USING BTREE ) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic; SET FOREIGN_KEY_CHECKS = 1;

    启动程序后,在postman中发起如下图所示的请求,即可采集数据。

    Processed: 0.010, SQL: 9