文章目录
配置信息ElasticSearch配置
server层实现业务编写--ES存储数据,获取ES数据controller层负责调用业务层(server)前后端交互
代码
配置信息
server
.port
=9090
# thymeleaf
spring
.thymeleaf
.cache
=false
ElasticSearch配置
/src/main/java/com/kuang/kuangshenesjd/config/ElasticSearchClientConfig.java
```java
package com.kuang.kuangshenesjd.config;
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
// 狂神的Spring两步骤
// 1.找对象
// 2.放到spring中
// 3.如果是springboot,先分析源码
// xxxx AutoConfiguration xxxProperfile
@Configuration
public class ElasticSearchClientConfig {
@Bean
public RestHighLevelClient restHighLevelClient() {
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("127.0.0.1", 9200, "http"))
);
return client;
}
}
项目结构
├── README.md ├── kuangshen-es-jd.iml ├── mvnw ├── mvnw.cmd ├── pom.xml ├── src │ ├── main │ │ ├── java │ │ │ └── com │ │ │ └── kuang │ │ │ └── kuangshenesjd │ │ │ ├── KuangshenEsJdApplication.java │ │ │ ├── config │ │ │ │ └── ElasticSearchClientConfig.java │ │ │ ├── controller │ │ │ │ ├── ContentController.java │ │ │ │ └── IndexController.java │ │ │ ├── pojo │ │ │ │ └── Content.java │ │ │ ├── service │ │ │ │ └── ContentService.java │ │ │ └── utils │ │ │ └── HtmlParseUtil.java │ │ └── resources │ │ ├── application.properties │ │ ├── static │ │ │ ├── css │ │ │ │ └── style.css │ │ │ ├── images │ │ │ │ └── jdlogo.png │ │ │ └── js │ │ │ └── jquery.min.js │ │ └── templates │ │ └── index.html │ └── test │ └── java │ └── com │ └── kuang │ └── kuangshenesjd │ └── KuangshenEsJdApplicationTests.java └── target ├── classes │ ├── application.properties │ ├── com │ │ └── kuang │ │ └── kuangshenesjd │ │ ├── KuangshenEsJdApplication.class │ │ ├── config │ │ │ └── ElasticSearchClientConfig.class │ │ ├── controller │ │ │ ├── ContentController.class │ │ │ └── IndexController.class │ │ ├── pojo │ │ │ └── Content.class │ │ ├── service │ │ │ └── ContentService.class │ │ └── utils │ │ └── HtmlParseUtil.class │ ├── static │ │ ├── css │ │ │ └── style.css │ │ ├── images │ │ │ └── jdlogo.png │ │ └── js │ │ └── jquery.min.js │ └── templates │ └── index.html ├── generated-sources │ └── annotations ├── generated-test-sources │ └── test-annotations └── test-classes └── com └── kuang └── kuangshenesjd └── KuangshenEsJdApplicationTests.class
45 directories, 31 files
# 爬取数据
数据问题?数据库中获取,消息队列中获取,都可以称为数据源。也可用爬虫解决。(当前只需要少量数据进行测试,所以项目中需先进行数据爬取解析)
## 爬取数据:
```java
package com.kuang.kuangshenesjd.utils;
import com.kuang.kuangshenesjd.pojo.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
@Component
public class HtmlParseUtil {
public static void main(String[] args) throws IOException {
new HtmlParseUtil().parseJD("java").forEach(System.out::println);
}
public ArrayList<Content> parseJD(String keywords) throws IOException {
// 获取请求,前提需要联网,ajax不能获取到!
String url = "https://search.jd.com/Search?keyword=" + keywords + "&enc=utf-8";
// 解析网页.(Jsoup返回的Document就是页面对象)
Document document = Jsoup.parse(new URL(url), 30000);
// 所有你在js中可以使用的方法
Element element = document.getElementById("J_goodsList");
// 获取所有的li元素
Elements elements = element.getElementsByTag("li");
// 获取元素中的内容,el代表每一个li标签
ArrayList<Content> goodsList = new ArrayList<>();
for (Element el : elements) {
// 关于这种图片特别多的网站,都是延迟加载的
String image = el.getElementsByTag("img").eq(0).attr("src");
String price = el.getElementsByClass("p-price").eq(0).text();
String title = el.getElementsByClass("p-name").eq(0).text();
Content content = new Content();
content.setTitle(title);
content.setImg(image);
content.setPrice(price);
goodsList.add(content);
System.out.println("=========================");
System.out.println(image);
System.out.println(price);
System.out.println(title);
}
return goodsList;
}
}
server层实现业务编写–ES存储数据,获取ES数据
package com
.kuang
.kuangshenesjd
.service
;
import com
.alibaba
.fastjson
.JSON
;
import com
.kuang
.kuangshenesjd
.pojo
.Content
;
import com
.kuang
.kuangshenesjd
.utils
.HtmlParseUtil
;
import org
.elasticsearch
.action
.bulk
.BulkRequest
;
import org
.elasticsearch
.action
.bulk
.BulkResponse
;
import org
.elasticsearch
.action
.index
.IndexRequest
;
import org
.elasticsearch
.action
.search
.SearchRequest
;
import org
.elasticsearch
.action
.search
.SearchResponse
;
import org
.elasticsearch
.client
.RequestOptions
;
import org
.elasticsearch
.client
.RestHighLevelClient
;
import org
.elasticsearch
.common
.text
.Text
;
import org
.elasticsearch
.common
.unit
.TimeValue
;
import org
.elasticsearch
.common
.xcontent
.XContentType
;
import org
.elasticsearch
.index
.query
.QueryBuilders
;
import org
.elasticsearch
.index
.query
.TermQueryBuilder
;
import org
.elasticsearch
.search
.SearchHit
;
import org
.elasticsearch
.search
.builder
.SearchSourceBuilder
;
import org
.elasticsearch
.search
.fetch
.subphase
.highlight
.HighlightBuilder
;
import org
.elasticsearch
.search
.fetch
.subphase
.highlight
.HighlightField
;
import org
.springframework
.beans
.factory
.annotation
.Autowired
;
import org
.springframework
.stereotype
.Service
;
import java
.io
.IOException
;
import java
.util
.ArrayList
;
import java
.util
.List
;
import java
.util
.Map
;
import java
.util
.concurrent
.TimeUnit
;
@Service
public class ContentService {
@Autowired
private RestHighLevelClient restHighLevelClient
;
public Boolean
parseContent(String keywords
) throws IOException
{
ArrayList
<Content> contents
= new HtmlParseUtil().parseJD(keywords
);
BulkRequest bulkRequest
= new BulkRequest();
bulkRequest
.timeout("2m");
for (int i
= 0; i
< contents
.size(); i
++) {
bulkRequest
.add(
new IndexRequest("jd_goods")
.source(JSON
.toJSONString(contents
.get(i
)), XContentType
.JSON
));
}
BulkResponse bulkResponse
= restHighLevelClient
.bulk(bulkRequest
, RequestOptions
.DEFAULT
);
return bulkResponse
.hasFailures();
}
public List
<Map
<String, Object>> searchPage(String keyword
, int pageNo
, int pageSize
) throws IOException
{
if (pageNo
< 1) {
pageNo
= 1;
}
SearchRequest searchRequest
= new SearchRequest("jd_goods");
SearchSourceBuilder sourceBuilder
= new SearchSourceBuilder();
sourceBuilder
.from(pageNo
);
sourceBuilder
.size(pageSize
);
TermQueryBuilder termQueryBuilder
= QueryBuilders
.termQuery("title", keyword
);
sourceBuilder
.query(termQueryBuilder
);
sourceBuilder
.timeout(new TimeValue(60, TimeUnit
.SECONDS
));
searchRequest
.source(sourceBuilder
);
SearchResponse response
= restHighLevelClient
.search(searchRequest
, RequestOptions
.DEFAULT
);
ArrayList
<Map
<String, Object>> list
= new ArrayList<>();
for (SearchHit hit
: response
.getHits().getHits()) {
list
.add(hit
.getSourceAsMap());
}
return list
;
}
public List
<Map
<String, Object>> searchPageHighlighter(String keyword
, int pageNo
, int pageSize
) throws IOException
{
if (pageNo
< 1) {
pageNo
= 1;
}
SearchRequest searchRequest
= new SearchRequest("jd_goods");
SearchSourceBuilder sourceBuilder
= new SearchSourceBuilder();
sourceBuilder
.from(pageNo
);
sourceBuilder
.size(pageSize
);
TermQueryBuilder termQueryBuilder
= QueryBuilders
.termQuery("title", keyword
);
sourceBuilder
.query(termQueryBuilder
);
sourceBuilder
.timeout(new TimeValue(60, TimeUnit
.SECONDS
));
HighlightBuilder highlightBuilder
= new HighlightBuilder();
highlightBuilder
.field("title");
highlightBuilder
.requireFieldMatch(false);
highlightBuilder
.preTags("<span style='color:red'>");
highlightBuilder
.postTags("</span>");
sourceBuilder
.highlighter(highlightBuilder
);
searchRequest
.source(sourceBuilder
);
SearchResponse response
= restHighLevelClient
.search(searchRequest
, RequestOptions
.DEFAULT
);
ArrayList
<Map
<String, Object>> list
= new ArrayList<>();
for (SearchHit hit
: response
.getHits().getHits()) {
Map
<String, HighlightField> highlightFields
= hit
.getHighlightFields();
HighlightField title
= highlightFields
.get("title");
Map
<String, Object> map
= hit
.getSourceAsMap();
if (title
!= null
) {
Text
[] fragments
= title
.fragments();
String newTitle
= "";
for (Text text
: fragments
) {
newTitle
+= text
;
}
map
.put("title", newTitle
);
}
list
.add(map
);
}
return list
;
}
}
controller层负责调用业务层(server)
通过接口的方式爬取数据,并存储在es中
package com
.kuang
.kuangshenesjd
.controller
;
import com
.kuang
.kuangshenesjd
.service
.ContentService
;
import org
.springframework
.beans
.factory
.annotation
.Autowired
;
import org
.springframework
.stereotype
.Controller
;
import org
.springframework
.ui
.Model
;
import org
.springframework
.web
.bind
.annotation
.GetMapping
;
import org
.springframework
.web
.bind
.annotation
.PathVariable
;
import org
.springframework
.web
.bind
.annotation
.ResponseBody
;
import java
.io
.IOException
;
import java
.util
.List
;
import java
.util
.Map
;
@Controller
public class ContentController {
@Autowired
private ContentService contentService
;
@GetMapping("/parse/{keyword}")
@ResponseBody
public Boolean
parse(@PathVariable("keyword") String keyword
) throws IOException
{
return contentService
.parseContent(keyword
);
}
@GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
public String
search(@PathVariable("keyword") String keyword
,
@PathVariable("pageNo") int pageNo
,
@PathVariable("pageSize") int pageSize
, Model model
) throws IOException
{
if (pageNo
== 0) {
pageNo
= 1;
}
System
.out
.println(keyword
+ pageNo
+ pageSize
);
List
<Map
<String, Object>> list
= contentService
.searchPageHighlighter(keyword
, pageNo
, pageSize
);
model
.addAttribute("list", list
);
return "index";
}
}
前后端交互