package com.example.es.test; import org.apache.http.HttpHost; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestClient; import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.sort.SortBuilders; import org.elasticsearch.search.sort.SortOrder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; import java.util.Map; /** * @author * @Description es的from-size用法 * @date 2022/01/26 10:04 */ public class ESTest_from_size { public static final Logger logger = LoggerFactory.getLogger(ESTest_searchAfter.class); public static void main(String[] args) throws Exception{ long startTime = System.currentTimeMillis(); // 创建ES客户端 RestHighLevelClient esClient = new RestHighLevelClient( RestClient.builder(new HttpHost("localhost", 9200, "http")) ); // 1、创建searchRequest SearchRequest searchRequest = new SearchRequest("audit2"); // 2、指定查询条件 SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();//必须加上track_total_hits,不然就只显示10000 // 页面上的第一页等同于在es中的 0 sourceBuilder.from(0); // 每页多少条数据 sourceBuilder.size(1000); // 设置唯一排序值定位 sourceBuilder.sort(SortBuilders.fieldSort("operationtime").order(SortOrder.DESC)); //将sourceBuilder对象添加到搜索请求中 searchRequest.source(sourceBuilder); // 发送请求 SearchResponse searchResponse = esClient.search(searchRequest, RequestOptions.DEFAULT); SearchHit[] hits = searchResponse.getHits().getHits(); List<Map<String, Object>> result = new ArrayList<>(); if (hits != null && hits.length > 0) { for (SearchHit hit : hits) { // 获取需要数据 Map<String, Object> sourceAsMap = hit.getSourceAsMap(); result.add(sourceAsMap); } } logger.info("查询出来的数据个数为:{}", result.size()); // 关闭客户端 esClient.close(); logger.info("运行时间: " + (System.currentTimeMillis() - startTime) + "ms"); } }
运行结果:
10:08:40.466 [main] INFO com.example.es.test.ESTest_searchAfter - 查询出来的数据个数为:1000 10:08:40.474 [main] INFO com.example.es.test.ESTest_searchAfter - 运行时间: 1506ms
现象:
如果from size 查询的数据超过10000条,会报错误
package com.example.es.test; import org.apache.http.HttpHost; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestClient; import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.sort.SortBuilders; import org.elasticsearch.search.sort.SortOrder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; import java.util.Map; /** * @author * @Description es 的search_after方法 * @date 2022/01/11 14:04 */ public class ESTest_searchAfter { public static final Logger logger = LoggerFactory.getLogger(ESTest_searchAfter.class); public static void main(String[] args) throws Exception{ long startTime = System.currentTimeMillis(); // 创建ES客户端 RestHighLevelClient esClient = new RestHighLevelClient( RestClient.builder(new HttpHost("localhost", 9200, "http")) ); // 1、创建searchRequest SearchRequest searchRequest = new SearchRequest("audit2"); // 2、指定查询条件 SearchSourceBuilder sourceBuilder = new SearchSourceBuilder().trackTotalHits(true);//必须加上track_total_hits,不然就只显示10000 //设置每页查询的数据个数 sourceBuilder.size(1000); // 设置唯一排序值定位 sourceBuilder.sort(SortBuilders.fieldSort("operationtime").order(SortOrder.DESC));//多条件查询 //将sourceBuilder对象添加到搜索请求中 searchRequest.source(sourceBuilder); // 发送请求 SearchResponse searchResponse = esClient.search(searchRequest, RequestOptions.DEFAULT); SearchHit[] hits1 = searchResponse.getHits().getHits(); List<Map<String, Object>> result = new ArrayList<>(); if (hits1 != null && hits1.length > 0) { do { for (SearchHit hit : hits1) { // 获取需要数据 Map<String, Object> sourceAsMap = hit.getSourceAsMap(); result.add(sourceAsMap); } // 取得最后得排序值sort,用于记录下次将从这个地方开始取数 SearchHit[] hits = searchResponse.getHits().getHits(); Object[] lastNum = hits[hits.length - 1].getSortValues(); // 设置searchAfter的最后一个排序值 sourceBuilder.searchAfter(lastNum); searchRequest.source(sourceBuilder); // 进行下次查询 searchResponse = esClient.search(searchRequest, RequestOptions.DEFAULT); } while (searchResponse.getHits().getHits().length != 0); } logger.info("查询出来的数据个数为:{}", result.size()); // 关闭客户端 esClient.close(); logger.info("运行时间: " + (System.currentTimeMillis() - startTime) + "ms"); } }
运行结果:
16:11:44.057 [main] INFO com.example.es.test.ESTest_searchAfter - 查询出来的数据个数为:64000 16:11:44.061 [main] INFO com.example.es.test.ESTest_searchAfter - 运行时间: 20979ms
现象:audit2该索引里面总共就69873条数据,控制台打印的信息是每1000条的查询打印出来,最终查询出来64000条记录,还有5873条数据丢失了。还有size如果超过10000,也会报错。
我自己的疑问:search after既然不能跳页查询,只能一页一页的查询出来,那前端调用这个接口后端不是还是返回全部的数据吗。那如果前端设置成向下滚动查询,然后滚轮向下几页后端就返回几页数据,后端不是会更省查询的时间吗。现在search after还是一次性将数据查询出来,只是内部它是一页一页查询出来的,最终展示出来的还是全部的数据。这个我有疑问,我应该怎么与前端对接这个接口。
package com.example.es.test; import org.apache.http.HttpHost; import org.elasticsearch.action.search.*; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestClient; import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.sort.SortBuilders; import org.elasticsearch.search.sort.SortOrder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; import java.util.Map; /** * @author * @Description java 实现scroll滚动查询 * @date 2021/12/08 14:09 */ public class ESTest_Scroll { public static final Logger logger = LoggerFactory.getLogger(ESTest_Scroll.class); public static void main(String[] args) throws Exception{ long startTime = System.currentTimeMillis(); // 创建ES客户端 RestHighLevelClient esClient = new RestHighLevelClient( RestClient.builder(new HttpHost("localhost", 9200, "http")) ); // 1、创建searchRequest SearchRequest searchRequest = new SearchRequest("audit2"); // 2、指定scroll信息 searchRequest.scroll(TimeValue.timeValueMinutes(1L)); // 3、指定查询条件 SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.size(1000); searchSourceBuilder.sort(SortBuilders.fieldSort("operationtime").order(SortOrder.DESC));//多条件查询 searchRequest.source(searchSourceBuilder); //4、获取返回结果scrollId,source SearchResponse searchResponse = esClient.search(searchRequest, RequestOptions.DEFAULT); //通过发送初始搜索请求来初始化搜索上下文 String scrollId = searchResponse.getScrollId(); SearchHit[] searchHits = searchResponse.getHits().getHits(); List<Map<String, Object>> result = new ArrayList<>(); for (SearchHit hit: searchHits) { result.add(hit.getSourceAsMap()); } // java也是一样要查询两次,先把我们的首页给查询出来 // 查询出来之后我们要获取他的id // 然后利用他的id去查询他的下一页 while (true) { //5、循环 - 创建 SearchScrollRequest 创建一个新的搜索滚动请求,保存最后返回的滚动标识符和滚动间隔 // 获取 scrollId 去查询下一页 SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId); //6、指定scrollId的生存时间 scrollRequest.scroll(TimeValue.timeValueMinutes(1L)); //7、执行查询获取返回结果 SearchResponse scrollResp = esClient.scroll(scrollRequest, RequestOptions.DEFAULT); //8、判断是否查询到了数据,输出 SearchHit[] hits = scrollResp.getHits().getHits(); //循环输出下一页 if (hits != null && hits.length > 0) { for (SearchHit hit : hits) { result.add(hit.getSourceAsMap()); } } else { //9、判断没有查询到数据,退出循环 break; } } //查完之后我们把存进缓存的id给删除 完成滚动后,清除滚动上下文 //10、创建ClearScrollRequest ClearScrollRequest clearScrollRequest = new ClearScrollRequest(); //11、指定scrollId clearScrollRequest.addScrollId(scrollId); //12、删除scrollId ClearScrollResponse clearScrollResponse = esClient.clearScroll(clearScrollRequest, RequestOptions.DEFAULT); //13、输出结果 boolean succeeded = clearScrollResponse.isSucceeded(); logger.info("删除scrollId:{}", succeeded); logger.info("查询总个数:{}", result.size()); // 关闭客户端 esClient.close(); logger.info("运行时间: " + (System.currentTimeMillis() - startTime) + "ms"); } }
运行结果:
16:20:54.794 [main] INFO com.example.es.test.ESTest_Scroll - 删除scrollId:true 16:20:54.795 [main] INFO com.example.es.test.ESTest_Scroll - 查询总个数:69873 16:20:54.797 [main] INFO com.example.es.test.ESTest_Scroll - 运行时间: 5716ms
现象:
audit2该索引里面总共就69873条数据,最终查询出来69873条记录,一条记录都没有丢失。还有size如果超过10000,也会报错。很奇怪,search after会丢失数据,而scroll一条记录没有丢失。