ElasticSearch仿京东搜索实战SpringBoot项目

    技术2022-07-13  62

    文章目录

    配置信息ElasticSearch配置 server层实现业务编写--ES存储数据,获取ES数据controller层负责调用业务层(server)前后端交互 代码

    配置信息

    server.port=9090 # thymeleaf spring.thymeleaf.cache=false

    ElasticSearch配置

    /src/main/java/com/kuang/kuangshenesjd/config/ElasticSearchClientConfig.java ```java package com.kuang.kuangshenesjd.config; import org.apache.http.HttpHost; import org.elasticsearch.client.RestClient; import org.elasticsearch.client.RestHighLevelClient; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; // 狂神的Spring两步骤 // 1.找对象 // 2.放到spring中 // 3.如果是springboot,先分析源码 // xxxx AutoConfiguration xxxProperfile @Configuration public class ElasticSearchClientConfig { @Bean public RestHighLevelClient restHighLevelClient() { RestHighLevelClient client = new RestHighLevelClient( RestClient.builder( new HttpHost("127.0.0.1", 9200, "http")) ); return client; } } 项目结构

    ├── README.md ├── kuangshen-es-jd.iml ├── mvnw ├── mvnw.cmd ├── pom.xml ├── src │ ├── main │ │ ├── java │ │ │ └── com │ │ │ └── kuang │ │ │ └── kuangshenesjd │ │ │ ├── KuangshenEsJdApplication.java │ │ │ ├── config │ │ │ │ └── ElasticSearchClientConfig.java │ │ │ ├── controller │ │ │ │ ├── ContentController.java │ │ │ │ └── IndexController.java │ │ │ ├── pojo │ │ │ │ └── Content.java │ │ │ ├── service │ │ │ │ └── ContentService.java │ │ │ └── utils │ │ │ └── HtmlParseUtil.java │ │ └── resources │ │ ├── application.properties │ │ ├── static │ │ │ ├── css │ │ │ │ └── style.css │ │ │ ├── images │ │ │ │ └── jdlogo.png │ │ │ └── js │ │ │ └── jquery.min.js │ │ └── templates │ │ └── index.html │ └── test │ └── java │ └── com │ └── kuang │ └── kuangshenesjd │ └── KuangshenEsJdApplicationTests.java └── target ├── classes │ ├── application.properties │ ├── com │ │ └── kuang │ │ └── kuangshenesjd │ │ ├── KuangshenEsJdApplication.class │ │ ├── config │ │ │ └── ElasticSearchClientConfig.class │ │ ├── controller │ │ │ ├── ContentController.class │ │ │ └── IndexController.class │ │ ├── pojo │ │ │ └── Content.class │ │ ├── service │ │ │ └── ContentService.class │ │ └── utils │ │ └── HtmlParseUtil.class │ ├── static │ │ ├── css │ │ │ └── style.css │ │ ├── images │ │ │ └── jdlogo.png │ │ └── js │ │ └── jquery.min.js │ └── templates │ └── index.html ├── generated-sources │ └── annotations ├── generated-test-sources │ └── test-annotations └── test-classes └── com └── kuang └── kuangshenesjd └── KuangshenEsJdApplicationTests.class

    45 directories, 31 files

    # 爬取数据 数据问题?数据库中获取,消息队列中获取,都可以称为数据源。也可用爬虫解决。(当前只需要少量数据进行测试,所以项目中需先进行数据爬取解析) ## 爬取数据: ```java package com.kuang.kuangshenesjd.utils; import com.kuang.kuangshenesjd.pojo.Content; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.springframework.stereotype.Component; import java.io.IOException; import java.net.URL; import java.util.ArrayList; @Component public class HtmlParseUtil { public static void main(String[] args) throws IOException { new HtmlParseUtil().parseJD("java").forEach(System.out::println); } public ArrayList<Content> parseJD(String keywords) throws IOException { // 获取请求,前提需要联网,ajax不能获取到! String url = "https://search.jd.com/Search?keyword=" + keywords + "&enc=utf-8"; // 解析网页.(Jsoup返回的Document就是页面对象) Document document = Jsoup.parse(new URL(url), 30000); // 所有你在js中可以使用的方法 Element element = document.getElementById("J_goodsList"); // 获取所有的li元素 Elements elements = element.getElementsByTag("li"); // 获取元素中的内容,el代表每一个li标签 ArrayList<Content> goodsList = new ArrayList<>(); for (Element el : elements) { // 关于这种图片特别多的网站,都是延迟加载的 String image = el.getElementsByTag("img").eq(0).attr("src"); String price = el.getElementsByClass("p-price").eq(0).text(); String title = el.getElementsByClass("p-name").eq(0).text(); Content content = new Content(); content.setTitle(title); content.setImg(image); content.setPrice(price); goodsList.add(content); System.out.println("========================="); System.out.println(image); System.out.println(price); System.out.println(title); } return goodsList; } }

    server层实现业务编写–ES存储数据,获取ES数据

    package com.kuang.kuangshenesjd.service; import com.alibaba.fastjson.JSON; import com.kuang.kuangshenesjd.pojo.Content; import com.kuang.kuangshenesjd.utils.HtmlParseUtil; import org.elasticsearch.action.bulk.BulkRequest; import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.common.text.Text; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; // 业务编写 @Service public class ContentService { @Autowired private RestHighLevelClient restHighLevelClient; // 1. 解析数据,放进es索引库中 public Boolean parseContent(String keywords) throws IOException { ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords); // 把查询的数据放到到es中 BulkRequest bulkRequest = new BulkRequest(); bulkRequest.timeout("2m"); for (int i = 0; i < contents.size(); i++) { bulkRequest.add( new IndexRequest("jd_goods") .source(JSON.toJSONString(contents.get(i)), XContentType.JSON)); } BulkResponse bulkResponse = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT); return bulkResponse.hasFailures(); } // 2.获取数据实现搜索功能 public List<Map<String, Object>> searchPage(String keyword, int pageNo, int pageSize) throws IOException { if (pageNo < 1) { pageNo = 1; } // 条件搜索 SearchRequest searchRequest = new SearchRequest("jd_goods"); SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); // 分页 sourceBuilder.from(pageNo); sourceBuilder.size(pageSize); // 精准匹配 TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword); sourceBuilder.query(termQueryBuilder); sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS)); // 执行搜索 searchRequest.source(sourceBuilder); SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT); // 解析结果 ArrayList<Map<String, Object>> list = new ArrayList<>(); for (SearchHit hit : response.getHits().getHits()) { list.add(hit.getSourceAsMap()); } return list; } // 3.获取数据,高亮 public List<Map<String, Object>> searchPageHighlighter(String keyword, int pageNo, int pageSize) throws IOException { if (pageNo < 1) { pageNo = 1; } // 条件搜索 SearchRequest searchRequest = new SearchRequest("jd_goods"); SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); // 分页 sourceBuilder.from(pageNo); sourceBuilder.size(pageSize); // 精准匹配 TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword); sourceBuilder.query(termQueryBuilder); sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS)); // 高亮 HighlightBuilder highlightBuilder = new HighlightBuilder(); highlightBuilder.field("title"); highlightBuilder.requireFieldMatch(false);// 关闭多个高亮显示 highlightBuilder.preTags("<span style='color:red'>"); highlightBuilder.postTags("</span>"); sourceBuilder.highlighter(highlightBuilder); // 执行搜索 searchRequest.source(sourceBuilder); SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT); // 解析结果 ArrayList<Map<String, Object>> list = new ArrayList<>(); for (SearchHit hit : response.getHits().getHits()) { // 解析高亮的字段 Map<String, HighlightField> highlightFields = hit.getHighlightFields(); HighlightField title = highlightFields.get("title"); Map<String, Object> map = hit.getSourceAsMap();// 原来的结果 // 解析高亮的字段 if (title != null) { // 将高亮的字段替换成原来没有高亮的字段 Text[] fragments = title.fragments(); String newTitle = ""; for (Text text : fragments) { newTitle += text; } map.put("title", newTitle); } list.add(map); } return list; } }

    controller层负责调用业务层(server)

    通过接口的方式爬取数据,并存储在es中

    package com.kuang.kuangshenesjd.controller; import com.kuang.kuangshenesjd.service.ContentService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Controller; import org.springframework.ui.Model; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.ResponseBody; import java.io.IOException; import java.util.List; import java.util.Map; @Controller public class ContentController { @Autowired private ContentService contentService; @GetMapping("/parse/{keyword}") @ResponseBody public Boolean parse(@PathVariable("keyword") String keyword) throws IOException { return contentService.parseContent(keyword); } @GetMapping("/search/{keyword}/{pageNo}/{pageSize}") public String search(@PathVariable("keyword") String keyword, @PathVariable("pageNo") int pageNo, @PathVariable("pageSize") int pageSize, Model model) throws IOException { if (pageNo == 0) { pageNo = 1; } System.out.println(keyword + pageNo + pageSize); List<Map<String, Object>> list = contentService.searchPageHighlighter(keyword, pageNo, pageSize); model.addAttribute("list", list); return "index"; } }

    前后端交互

    Processed: 0.023, SQL: 9