Elasticsearch搜索功能的实现(五)– 实战

实战环境

elastic search 8.5.0 + kibna 8.5.0 + springboot 3.0.2 + spring data elasticsearch 5.0.2 + jdk 17

一、集成 spring data elasticsearch

1 添加依赖

<dependency>     <groupId>org.springframework.boot</groupId>     <artifactId>spring-boot-starter-data-elasticsearch</artifactId> </dependency> 

2 配置es连接

@Configuration public class ElasticsearchConfig extends ElasticsearchConfiguration {      @Override     public ClientConfiguration clientConfiguration() {              return ClientConfiguration.builder()                     .connectedTo("127.0.0.1:9200")                     .withBasicAuth("elastic", "********")                     .build();      } } 

3 配置打印DSL语句

# 日志配置 logging:   level:     #es日志     org.springframework.data.elasticsearch.client.WIRE : trace 

二、index及mapping 文件编写

@Data @Document(indexName = "news") //索引名 @Setting(shards = 1,replicas = 0,refreshInterval = "1s") //shards 分片数 replicas 副本数 @Schema(name = "News",description = "新闻对象") public class News implements Serializable {      @Id  //索引主键     @NotBlank(message = "新闻ID不能为空")     @Schema(type = "integer",description = "新闻ID",example = "1")     private Integer id;      @NotBlank(message = "新闻标题不能为空")     @Schema(type = "String",description = "新闻标题")     @MultiField(mainField = @Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_smart"),             otherFields = {@InnerField(type = FieldType.Keyword, suffix = "keyword") }) //混合类型字段 指定 建立索引时分词器与搜索时入参分词器     private String title;      @Schema(type = "LocalDate",description = "发布时间")     @Field(type = FieldType.Date,format = DateFormat.date)     private LocalDate pubDate;      @Schema(type = "String",description = "来源")     @Field(type = FieldType.Keyword)     private String source;      @Schema(type = "String",description = "行业类型代码",example = "1,2,3")     @Field(type = FieldType.Text,analyzer = "ik_max_word",searchAnalyzer = "ik_smart")     private String industry;      @Schema(type = "String",description = "预警类型")     @Field(type = FieldType.Keyword)     private String type;      @Schema(type = "String",description = "涉及公司")     @Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_smart")     private String companies;      @Schema(type = "String",description = "新闻内容")     @Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_smart")     private String content;  } 

三、DAO层编写

@Repository public interface NewsRepository extends ElasticsearchRepository<News,Integer> {      Page<News> findByType(String type, Pageable pageable); } 

四、简单功能实现

4.1 简单功能写法

    /**      * 新增新闻      * @param news      * @return      */     @Override     public void saveNews(News news) {         newsRepository.save(news);     }      /**      * 删除新闻      * @param newsId      */     @Override     public void delete(Integer newsId) {         newsRepository.deleteById(newsId);     }      /**      * 删除新闻索引      */     @Override     public void deleteIndex() {         operations.indexOps(News.class).delete();     }      /**      * 创建索引      */     @Override     public void createIndex() {         operations.indexOps(News.class).createWithMapping();     }      @Override     public PageResult findByType(String type) {         // 先发布日期排序         Sort sort = Sort.by(new Order(Sort.Direction.DESC, "pubDate"));         Pageable pageable = PageRequest.of(0,10,sort);         final Page<News> newsPage = newsRepository.findByType(type, pageable);         return new PageResult(newsPage.getTotalElements(),newsPage.getContent());      } 

实现效果图片:
Elasticsearch搜索功能的实现(五)-- 实战

实际执行的DSL语句:
Elasticsearch搜索功能的实现(五)-- 实战

注意: 当指定排序条件时 _score 会被置空

4.2 搜索功能的实现

    @Override     public PageResult searchNews(NewsPageSearch search) {          //创建原生查询DSL对象         final NativeQueryBuilder nativeQueryBuilder = new NativeQueryBuilder();          // 先发布日期再得分排序         Sort sort = Sort.by(new Order(Sort.Direction.DESC, "pubDate"),new Order(Sort.Direction.DESC, "_score"));          Pageable pageable = PageRequest.of(search.getCurPage(), search.getPageSize(),sort);           final BoolQuery.Builder boolBuilder = new BoolQuery.Builder();         //过滤条件         setFilter(search, boolBuilder);          //关键字搜索         if (StringUtils.isNotBlank(search.getKeyword())){             setKeyWordAndHighlightField(search, nativeQueryBuilder, boolBuilder);         }else {             nativeQueryBuilder.withQuery(q -> q.bool(boolBuilder.build()));         }          nativeQueryBuilder.withPageable(pageable);          SearchHits<News> searchHits = operations.search(nativeQueryBuilder.build(), News.class);         //高亮回填封装         final List<News> newsList = searchHits.getSearchHits().stream()                 .map(s -> {                     final News content = s.getContent();                     final List<String> title = s.getHighlightFields().get("title");                     final List<String> contentList = s.getHighlightFields().get("content");                     if (!CollectionUtils.isEmpty(title)){                         s.getContent().setTitle(title.get(0));                     }                     if (!CollectionUtils.isEmpty(contentList)){                         s.getContent().setContent(contentList.get(0));                     }                     return content;                  }).collect(Collectors.toList());          return new PageResult<News>(searchHits.getTotalHits(),newsList);      }      /**      * 设置过滤条件 行业类型 来源 预警类型      * @param search      * @param boolBuilder      */     private void setFilter(NewsPageSearch search, BoolQuery.Builder boolBuilder) {         //行业类型         if(StringUtils.isNotBlank(search.getIndustry())){             // 按逗号拆分             List<Query> industryQueries = Arrays.asList(search.getIndustry().split(",")).stream().map(p -> {                 Query.Builder queryBuilder = new Query.Builder();                 queryBuilder.term(t -> t.field("industry").value(p));                 return queryBuilder.build();             }).collect(Collectors.toList());             boolBuilder.filter(f -> f.bool(t -> t.should(industryQueries)));         }         // 来源         if(StringUtils.isNotBlank(search.getSource())){             // 按逗号拆分             List<Query> sourceQueries = Arrays.asList(search.getSource().split(",")).stream().map(p -> {                 Query.Builder queryBuilder = new Query.Builder();                 queryBuilder.term(t -> t.field("source").value(p));                 return queryBuilder.build();             }).collect(Collectors.toList());             boolBuilder.filter(f -> f.bool(t -> t.should(sourceQueries)));         }         // 预警类型         if(StringUtils.isNotBlank(search.getType())){             // 按逗号拆分             List<Query> typeQueries = Arrays.asList(search.getType().split(",")).stream().map(p -> {                 Query.Builder queryBuilder = new Query.Builder();                 queryBuilder.term(t -> t.field("type").value(p));                 return queryBuilder.build();             }).collect(Collectors.toList());             boolBuilder.filter(f -> f.bool(t -> t.should(typeQueries)));         }          //范围区间         if (StringUtils.isNotBlank(search.getStartDate())){             boolBuilder.filter(f -> f.range(r -> r.field("pubDate")                     .gte(JsonData.of(search.getStartDate()))                     .lte(JsonData.of(search.getEndDate()))));         }     }      /**      * 关键字搜索 title 权重更高      * 高亮字段  title 、content      * @param search      * @param nativeQueryBuilder      * @param boolBuilder      */     private void setKeyWordAndHighlightField(NewsPageSearch search, NativeQueryBuilder nativeQueryBuilder, BoolQuery.Builder boolBuilder) {         final String keyword = search.getKeyword();         //查询条件         boolBuilder.must(b -> b.multiMatch(m -> m.fields("title","content","companies").query(keyword)));          //高亮         final HighlightFieldParameters.HighlightFieldParametersBuilder builder = HighlightFieldParameters.builder();         builder.withPreTags("<font color='red'>")                 .withPostTags("</font>")                 .withRequireFieldMatch(true) //匹配才加标签                 .withNumberOfFragments(0); //显示全文         final HighlightField titleHighlightField = new HighlightField("title", builder.build());         final HighlightField contentHighlightField = new HighlightField("content", builder.build());         final Highlight titleHighlight = new Highlight(List.of(titleHighlightField,contentHighlightField));          nativeQueryBuilder.withQuery(                         f -> f.functionScore(                                 fs -> fs.query(q -> q.bool(boolBuilder.build()))                                         .functions( FunctionScore.of(func -> func.filter(                                                         fq -> fq.match(ft -> ft.field("title").query(keyword))).weight(100.0)),                                                 FunctionScore.of(func -> func.filter(                                                         fq -> fq.match(ft -> ft.field("content").query(keyword))).weight(20.0)),                                                 FunctionScore.of(func -> func.filter(                                                         fq -> fq.match(ft -> ft.field("companies").query(keyword))).weight(10.0)))                                         .scoreMode(FunctionScoreMode.Sum)                                         .boostMode(FunctionBoostMode.Sum)                                         .minScore(1.0)))                 .withHighlightQuery(new HighlightQuery(titleHighlight,News.class));      } 

实现效果

加权前效果:
Elasticsearch搜索功能的实现(五)-- 实战

加权后效果:
Elasticsearch搜索功能的实现(五)-- 实战

DSL 语句:

{ 	"from": 0, 	"size": 6, 	"sort": [{ 		"pubDate": { 			"mode": "min", 			"order": "desc" 		} 	}, { 		"_score": { 			"order": "desc" 		} 	}], 	"highlight": { 		"fields": { 			"title": { 				"number_of_fragments": 0, 				"post_tags": ["</font>"], 				"pre_tags": ["<font color='red'>"] 			}, 			"content": { 				"number_of_fragments": 0, 				"post_tags": ["</font>"], 				"pre_tags": ["<font color='red'>"] 			} 		} 	}, 	"query": { 		"function_score": { 			"boost_mode": "sum", 			"functions": [{ 				"filter": { 					"match": { 						"title": { 							"query": "立足优势稳住外贸基本盘" 						} 					} 				}, 				"weight": 100.0 			}, { 				"filter": { 					"match": { 						"content": { 							"query": "立足优势稳住外贸基本盘" 						} 					} 				}, 				"weight": 20.0 			}, { 				"filter": { 					"match": { 						"companies": { 							"query": "立足优势稳住外贸基本盘" 						} 					} 				}, 				"weight": 10.0 			}], 			"min_score": 1.0, 			"query": { 				"bool": { 					"filter": [{ 						"bool": { 							"should": [{ 								"term": { 									"industry": { 										"value": "1" 									} 								} 							}, { 								"term": { 									"industry": { 										"value": "2" 									} 								} 							}, { 								"term": { 									"industry": { 										"value": "3" 									} 								} 							}] 						} 					}, { 						"bool": { 							"should": [{ 								"term": { 									"source": { 										"value": "新华社" 									} 								} 							}, { 								"term": { 									"source": { 										"value": "中国经济网" 									} 								} 							}] 						} 					}, { 						"bool": { 							"should": [{ 								"term": { 									"type": { 										"value": "经济简报" 									} 								} 							}, { 								"term": { 									"type": { 										"value": "外贸简报" 									} 								} 							}] 						} 					}, { 						"range": { 							"pubDate": { 								"gte": "2023-03-29", 								"lte": "2023-03-30" 							} 						} 					}], 					"must": [{ 						"multi_match": { 							"fields": ["title", "content", "companies"], 							"query": "立足优势稳住外贸基本盘" 						} 					}] 				} 			}, 			"score_mode": "sum" 		} 	}, 	"track_scores": false, 	"version": true } 

4.3 接口测试

Elasticsearch搜索功能的实现(五)-- 实战

发表评论

评论已关闭。

相关文章