SpringBoot整合ES高級查詢方式
- springboot版本:2.0.5.RELEASE
- elasticsearch版本:7.9.1
1、配置
引入依賴:
<dependency> <groupId>org.elasticsearch.client</groupId> <artifactId>elasticsearch-rest-high-level-client</artifactId> <version>7.9.1</version> </dependency> <dependency> <groupId>org.elasticsearch</groupId> <artifactId>elasticsearch</artifactId> <version>7.9.1</version> </dependency>
application.properties 配置文件:
elasticsearch.schema=http elasticsearch.address=192.168.80.130:9200,192.168.80.131:9200,192.168.80.132:9200 elasticsearch.connectTimeout=10000 elasticsearch.socketTimeout=60000 elasticsearch.connectionRequestTimeout=10000 elasticsearch.maxConnectNum=200 elasticsearch.maxConnectPerRoute=200 # 無密碼可忽略 elasticsearch.userName=elastic elasticsearch.password=123456
連接配置:
import org.apache.http.HttpHost; import org.apache.http.auth.AuthScope; import org.apache.http.auth.UsernamePasswordCredentials; import org.apache.http.client.CredentialsProvider; import org.apache.http.impl.client.BasicCredentialsProvider; import org.elasticsearch.client.RestClient; import org.elasticsearch.client.RestClientBuilder; import org.elasticsearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import javax.annotation.PreDestroy; import java.io.IOException; import java.util.ArrayList; import java.util.List; @Configuration public class ElasticSearchConfig { /** * 協(xié)議 */ @Value("${elasticsearch.schema:http}") private String schema; /** * 集群地址,如果有多個用“,”隔開 */ @Value("${elasticsearch.address}") private String address; /** * 集群地址,如果有多個用“,”隔開 */ @Value("${elasticsearch.userName}") private String userName; /** * 集群地址,如果有多個用“,”隔開 */ @Value("${elasticsearch.password}") private String password; /** * 連接超時時間 */ @Value("${elasticsearch.connectTimeout:5000}") private int connectTimeout; /** * Socket 連接超時時間 */ @Value("${elasticsearch.socketTimeout:10000}") private int socketTimeout; /** * 獲取連接的超時時間 */ @Value("${elasticsearch.connectionRequestTimeout:5000}") private int connectionRequestTimeout; /** * 最大連接數(shù) */ @Value("${elasticsearch.maxConnectNum:100}") private int maxConnectNum; /** * 最大路由連接數(shù) */ @Value("${elasticsearch.maxConnectPerRoute:100}") private int maxConnectPerRoute; private RestHighLevelClient restHighLevelClient; @Bean public RestHighLevelClient restHighLevelClient() { final CredentialsProvider credentialsProvider = new BasicCredentialsProvider(); UsernamePasswordCredentials elastic = new UsernamePasswordCredentials(userName, password); credentialsProvider.setCredentials(AuthScope.ANY,elastic); // 拆分地址 List<HttpHost> hostLists = new ArrayList<>(); String[] hostList = address.split(","); for (String addr : hostList) { String host = addr.split(":")[0]; String port = addr.split(":")[1]; hostLists.add(new HttpHost(host, Integer.parseInt(port), schema)); } // 轉(zhuǎn)換成 HttpHost 數(shù)組 HttpHost[] httpHost = hostLists.toArray(new HttpHost[]{}); // 構(gòu)建連接對象 RestClientBuilder builder = RestClient.builder(httpHost); // 異步連接延時配置 builder.setRequestConfigCallback(requestConfigBuilder -> { requestConfigBuilder.setConnectTimeout(connectTimeout); requestConfigBuilder.setSocketTimeout(socketTimeout); requestConfigBuilder.setConnectionRequestTimeout(connectionRequestTimeout); return requestConfigBuilder; }); // 異步連接數(shù)配置 builder.setHttpClientConfigCallback(httpClientBuilder -> { httpClientBuilder.setMaxConnTotal(maxConnectNum); httpClientBuilder.setMaxConnPerRoute(maxConnectPerRoute); httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider); return httpClientBuilder; }); restHighLevelClient = new RestHighLevelClient(builder); return restHighLevelClient; } @PreDestroy public void clientClose() { try { this.restHighLevelClient.close(); } catch (IOException e) { e.printStackTrace(); } } }
2、API操作ES
2.1 查詢索引列表
可以模糊匹配索引名稱
@Test public void tset() throws IOException { GetIndexRequest getIndexRequest = new GetIndexRequest("log*"); // 獲取es前綴過濾下所有索引 GetIndexResponse getIndexResponse = restHighLevelClient.indices().get(getIndexRequest, RequestOptions.DEFAULT); // 將es查出的索引轉(zhuǎn)換為list List<String> elasticsearchList = new ArrayList<>(getIndexResponse.getMappings().keySet()); elasticsearchList.forEach(System.out::println); }
2.2 TermsQuery
es 的 trem query 做的是精確匹配查詢,關(guān)于這里早 serviceName 字段后面加的 .keyword 說明如下:
1.es5.0 及以后的版本取消了 String 類型,將原先的 String 類型拆分為 text 和 keyword 兩種類型。它們的區(qū)別在于 text 會對字段進(jìn)行分詞處理而 keyword 則不會。
2.當(dāng)沒有為索引字段預(yù)先指定 mapping 的話,es 就會使用 Dynamic Mapping ,通過推斷你傳入的文檔中字段的值對字段進(jìn)行動態(tài)映射。例如傳入的文檔中字段 total 的值為12,那么 total 將被映射為 long 類型;字段 addr 的值為"192.168.0.1",那么 addr 將被映射為 ip 類型。然而對于不滿足 ip 和 long 格式的普通字符串來說,情況有些不同:ES 會將它們映射為 text 類型,但為了保留對這些字段做精確查詢以及聚合的能力,又同時對它們做了 keyword 類型的映射,作為該字段的 fields 屬性寫到 _mapping 中。例如,我這里使用的字段 “serviceName”,用來存儲服務(wù)名稱字符串類型,會對它做如下的 Dynamic Mapping:
"serviceName" : { "type" : "text", "fields" : { "keyword" : { "type" : "keyword", "ignore_above" : 256 } } }
在之后的查詢中使用 serviceName 是將 serviceName 作為 text 類型查詢,而使用 serviceName.keyword 則是將 serviceName 作為 keyword 類型查詢。前者會對查詢內(nèi)容做分詞處理之后再匹配,而后者則是直接對查詢結(jié)果做精確匹配。
3.es 的 trem query 做的是精確匹配而不是分詞查詢,因此對 text 類型的字段做 term 查詢將是查不到結(jié)果的(除非字段本身經(jīng)過分詞器處理后不變,未被轉(zhuǎn)換或分詞)。此時,必須使用 serviceName.keyword 來對 serviceName 字段以 keyword 類型進(jìn)行精確匹配。
GET logdata-log-center-2021.05.06/_search { "query": { "terms": { "serviceName.keyword": [ "log-center-user-portal", "log-center-collect-manage" ] } } }
Java API
@Test public void test() throws IOException { //構(gòu)建查詢源構(gòu)建器 SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); // termQuery只能匹配一個值,第一個入?yún)樽侄蚊Q,第二個參數(shù)為傳入的值,相當(dāng)于sql中的= // searchSourceBuilder.query(QueryBuilders.termQuery("serviceName.keyword", "log-center-user-portal-web")); //termsQuery可以一次性匹配多個值,相當(dāng)于sql中的in searchSourceBuilder.query(QueryBuilders.termsQuery("serviceName.keyword", "log-center-user-portal-web", "log-center-collect-manage")); //構(gòu)建查詢請求對象,入?yún)樗饕? SearchRequest searchRequest = new SearchRequest("log-web-up-log-center-2021.10.30"); //向搜索請求對象中配置搜索源 searchRequest.source(searchSourceBuilder); // 執(zhí)行搜索,向ES發(fā)起http請求 SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT); if (RestStatus.OK.equals(response.status())) { long total = response.getHits().getTotalHits().value; //檢索到符合條件的總數(shù) SearchHit[] hits = response.getHits().getHits(); //未指定size,默認(rèn)查詢的是10條 for (SearchHit hit : hits) { String index = hit.getIndex();//索引名稱 String id = hit.getId(); //文檔id JSONObject jsonObject = JSON.parseObject(hit.getSourceAsString(), JSONObject.class); //文檔內(nèi)容 System.out.println(jsonObject); } } }
2.3 WildcardQuery
es的 wildcard query 做的是模糊匹配查詢,類似 sql 中的 like,而 value 值前后的 “*” 號類似與 sql 中的 ”%“ 。
GET logdata-log-center-2021.05.06/_search { "query": { "wildcard": { "serviceName.keyword": { "value": "*user-portal*" } } } }
Java API
searchSourceBuilder.query(QueryBuilders.wildcardQuery("serviceName.keyword", "*" + "user-portal" + "*"));
2.4 RangeQuery
es 的 range query 做的是范圍查詢,相當(dāng)于 sql 中的 between … and …
GET log-web-up-log-center-2021.10.30/_search { "query": { "range": { "timestamp": { "gte": "2021-10-30 15:00:00", "lte": "2021-10-30 16:00:00", "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd HH:mm:ss.SSS" } } } }
Java API
searchSourceBuilder.query(QueryBuilders.rangeQuery("timestamp") .gte("2021-10-30 15:00:00") //起始值 .lte("2021-10-30 16:00:00") //結(jié)束值 .format("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd HH:mm:ss.SSS"));//可以指定多個格式化標(biāo)準(zhǔn),使用||隔開
2.5 MatchQuery
es的 match query 做的是全文檢索,會對關(guān)鍵字進(jìn)行分詞后匹配詞條。
GET log-web-up-log-center-2021.10.30/_search { "query": { "match": { "orgName": { "query": "有限公司" } } } }
query:搜索的關(guān)鍵字,對于英文關(guān)鍵字如果有多個單詞則中間要用半角逗號分隔,而對于中文關(guān)鍵字中間可以用逗號分隔也可以不用。
Java API
//全文檢索,支持分詞匹配 searchSourceBuilder.query(QueryBuilders.matchQuery("orgName", "有限公司");
2.6 MultiMatchQuery
上面的 MatchQuery 有一個短板,假如用戶輸入了某關(guān)鍵字,我們在檢索的時候不知道具體是哪一個字段,這時我們用什么都不合適,而 MultiMatchQuery 的出現(xiàn)解決了這個問題,他可以通過 fields 屬性來設(shè)置多個域聯(lián)合查找,具體用法如下
GET log-web-up-log-center-2021.10.30/_search { "query": { "multi_match": { "query": "user-portal", "fields": ["serviceName", "systemName"] } } }
Java API
//全文檢索,支持分詞匹配,支持多字段檢索 searchSourceBuilder.query(QueryBuilders.multiMatchQuery("user-portal", "serviceName", "systemName", "description"));
2.7 ExistsQuery
es的 exists query 做的是檢索某個字段存在的數(shù)據(jù),即不為 null 的數(shù)據(jù)。其中指定的 field 可以是一個具體的字段,也可以是一個 json 結(jié)構(gòu)。
GET logdata-log-center-2021.05.06/_search { "query": { "exists": { "field": "networkLogDetailInfo" } } }
Java API
//查詢networkLogDetailInfo不為null的數(shù)據(jù) searchSourceBuilder.query(QueryBuilders.existsQuery("networkLogDetailInfo"));
2.8 BoolQuery
es的 bool query 做的是將多個查詢組合起來去檢索數(shù)據(jù),主要的組合參數(shù)有 must、should、mustNot 等。
must
:數(shù)據(jù)必須匹配 must 所包含的查詢條件,相當(dāng)于 ”AND“should
:數(shù)據(jù)匹配 should 包含的一個或多個查詢條件,相當(dāng)于 ”OR“mustNot
:數(shù)據(jù)必須不匹配 mustNot 所包含的查詢條件,相當(dāng)于 ”NOT“
GET logdata-log-center-2021.05.06/_search { "query": { "bool": { "must": [ { "exists": { "field": "networkLogDetailInfo" } }, { "range": { "timestamp": { "gte": "2021-05-05 00:00:00", "lte": "2021-05-07 00:00:00", "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd HH:mm:ss.SSS" } } } ], "must_not": [ { "exists": { "field": "serviceLogDetailInfo" } } ] } } }
Java API
@Test public void test() throws IOException { //構(gòu)建查詢源構(gòu)建器 SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); //構(gòu)建bool類型查詢器 BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); //使用must連接,相當(dāng)于and,構(gòu)建第一個查詢條件existsQuery必須包含此字段 boolQueryBuilder.must(QueryBuilders.existsQuery("networkLogDetailInfo")); //使用must連接第二個條件,rangeQuery范圍查找,相當(dāng)于between...and... boolQueryBuilder.must(QueryBuilders.rangeQuery("timestamp") .from("2021-05-05 00:00:00") //起始值 .to("2021-05-07 00:00:00") //結(jié)束值 .includeLower(true) //是否等于起始值 .includeUpper(false) //是否等于結(jié)束值 .format("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd HH:mm:ss.SSS")); //格式化時間 //使用mustNot連接第三個條件 boolQueryBuilder.mustNot(QueryBuilders.existsQuery("serviceLogDetailInfo")); searchSourceBuilder.query(boolQueryBuilder); //構(gòu)建查詢請求對象,入?yún)樗饕? SearchRequest searchRequest = new SearchRequest("logdata-log-center-2021.05.06"); //向搜索請求對象中配置搜索源 searchRequest.source(searchSourceBuilder); // 執(zhí)行搜索,向ES發(fā)起http請求 SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT); if (RestStatus.OK.equals(response.status())) { long total = response.getHits().getTotalHits().value; //檢索到符合條件的總數(shù) SearchHit[] hits = response.getHits().getHits(); for (SearchHit hit : hits) { String index = hit.getIndex();//索引名稱 String id = hit.getId(); //文檔id JSONObject jsonObject = JSON.parseObject(hit.getSourceAsString(), JSONObject.class); //文檔內(nèi)容 System.out.println(jsonObject); } } }
2.9 排序
es 使用 sort 進(jìn)行排序,可以多個字段聯(lián)合排序。
GET logdata-log-center-2021.05.06/_search { "query": { "bool": { "must_not": [ { "exists": { "field": "serviceLogDetailInfo" } } ] } }, "sort": [ { "serviceName.keyword": { "order": "asc" }, "timestamp": { "order": "desc" } } ] }
先按照第一個字段排序,第一個字段相同時按照第二個字段排序。
Java API
//升序 searchSourceBuilder.sort("serviceName.keyword", SortOrder.ASC); //降序 searchSourceBuilder.sort("timestamp", SortOrder.DESC);
2.10 結(jié)果字段過濾
檢索數(shù)據(jù),有時只需要其中的幾個字段,es 也支持對結(jié)果集進(jìn)行字段篩選過濾。字段可以使用 “*” 進(jìn)行模糊匹配。
GET logdata-log-center-2021.05.06/_search { "_source": { "includes": ["messageId", "system*", "service*", "timestamp"], "excludes": [] } }
Java API
//篩選字段,第一個參數(shù)為需要的字段,第二個參數(shù)為不需要的字段 searchSourceBuilder.fetchSource(new String[] {"messageId", "system*", "service*", "timestamp"}, new String[] {});
2.11 分頁
es 的分頁方式有三種:from+ size、scroll、search_after, 默認(rèn)采用的分頁方式是 from+ size 的形式。
2.11.1 from+ size
GET logdata-log-center-2021.05.06/_search { "from": 0, "size": 2, "query": { "exists": { "field": "networkLogDetailInfo" } }, "_source": { "includes": ["messageId", "system*", "service*", "timestamp"], "excludes": [] } }
通過查詢結(jié)果可以發(fā)現(xiàn),我們設(shè)置了分頁參數(shù)之后, hits.total 返回的是數(shù)據(jù)總數(shù)7149,而按照分頁規(guī)則,我們設(shè)置的size=2,因此 hits.hits 里面只有兩條數(shù)據(jù)。
Java API
@Test public void test() throws IOException { //構(gòu)建查詢源構(gòu)建器 SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); //查詢條件 searchSourceBuilder.query(QueryBuilders.existsQuery("networkLogDetailInfo")); int page = 1; // 頁碼 int size = 2; // 每頁顯示的條數(shù) int index = (page - 1) * size; searchSourceBuilder.from(index); //設(shè)置查詢起始位置 searchSourceBuilder.size(size); //結(jié)果集返回的數(shù)據(jù)條數(shù) //篩選字段,第一個參數(shù)為需要的字段,第二個參數(shù)為不需要的字段 searchSourceBuilder.fetchSource(new String[] {"messageId", "system*", "service*", "timestamp"}, new String[] {}); //構(gòu)建查詢請求對象,入?yún)樗饕? SearchRequest searchRequest = new SearchRequest("logdata-log-center-2021.05.06"); //向搜索請求對象中配置搜索源 searchRequest.source(searchSourceBuilder); // 執(zhí)行搜索,向ES發(fā)起http請求 SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT); if (RestStatus.OK.equals(response.status())) { long total = response.getHits().getTotalHits().value; //檢索到符合條件的總數(shù) SearchHit[] hits = response.getHits().getHits(); //未指定size,默認(rèn)查詢的是10條 for (SearchHit hit : hits) { String index = hit.getIndex();//索引名稱 String id = hit.getId(); //文檔id JSONObject jsonObject = JSON.parseObject(hit.getSourceAsString(), JSONObject.class); //文檔內(nèi)容 System.out.println(jsonObject); } } }
2.11.2 scroll
一種可滿足深度分頁的方式,es 提供了 scroll 的方式進(jìn)行分頁讀取。原理上是對某次查詢生成一個游標(biāo) scroll_id , 后續(xù)的查詢只需要根據(jù)這個游標(biāo)去取數(shù)據(jù),每次只能拿到下一頁的數(shù)據(jù),直到結(jié)果集中返回的 hits 字段為空,就表示遍歷結(jié)束。這里scroll=1m是scroll_id的有效期,表示1分鐘,過期后會被es自動清理,每次查詢會更新此值。
GET logdata-log-center-2021.05.06/_search?scroll=1m { "size": 2, "query": { "exists": { "field": "networkLogDetailInfo" } }, "_source": { "includes": ["messageId", "system*", "service*", "timestamp"], "excludes": [] } }
后續(xù)的查詢中查詢條件不需要指定,只需要攜帶 scroll_id 即可它會按照首次查詢條件進(jìn)行分頁展示,下一次查詢(兩種方式):
POST /_search/scroll { "scroll": "1m", "scroll_id": "FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFFp0bGhXbjBCQU55Q3EtSDcxaWF4AAAAAACF-OYWV0liWUNLUHVTN09DS1ZtUl9SSHhVdw==" }
GET /_search/scroll?scroll=1m&scroll_id=FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFFp0bGhXbjBCQU55Q3EtSDcxaWF4AAAAAACF-OYWV0liWUNLUHVTN09DS1ZtUl9SSHhVdw==
Java API
public void testScroll(String scrollId) throws IOException { //查詢源構(gòu)建器 SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); //每頁顯示2條 searchSourceBuilder.size(2); //查詢條件 searchSourceBuilder.query(QueryBuilders.existsQuery("networkLogDetailInfo")); //篩選字段,第一個參數(shù)為需要的字段,第二個參數(shù)為不需要的字段 searchSourceBuilder.fetchSource(new String[] {"messageId", "system*", "service*", "timestamp"}, new String[] {}); SearchRequest request = new SearchRequest("logdata-log-center-2021.05.06"); request.source(searchSourceBuilder); Scroll scroll = new Scroll(TimeValue.timeValueMinutes(1L)); request.scroll(scroll);//滾動翻頁 SearchResponse response; if (!StringUtils.isBlank(scrollId)) { //Scroll查詢 SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId); scrollRequest.scroll(scroll); response = restHighLevelClient.scroll(scrollRequest, RequestOptions.DEFAULT); } else { //首次查詢使用普通查詢 response = restHighLevelClient.search(request, RequestOptions.DEFAULT); } //更新scrollId scrollId = response.getScrollId(); System.out.println(scrollId); if (RestStatus.OK.equals(response.status())) { //設(shè)置查詢總量 SearchHit[] hits = response.getHits().getHits(); for (SearchHit hit : hits) { String index = hit.getIndex(); String id = hit.getId(); JSONObject jsonObject = JSON.parseObject(hit.getSourceAsString(), JSONObject.class); System.out.println(jsonObject); } } }
2.11.3 search_after
search_after 是 ES5.0 及之后版本提供的新特性,search_after查詢時需要指定sort排序字段,可以指定多個排序字段,后續(xù)查詢有點類似 scroll ,但是和 scroll 又不一樣,它提供一個活動的游標(biāo),通過上一次查詢的最后一條數(shù)據(jù)的來進(jìn)行下一次查詢。 這里需要說明一下,使用search_after查詢需要將from設(shè)置為0或-1,當(dāng)然你也可以不寫
第一次查詢:
POST logdata-log-center-2021.05.06/_search { "size": 2, "query": { "exists": { "field": "networkLogDetailInfo" } }, "_source": { "includes": ["messageId", "system*", "service*", "timestamp"], "excludes": [] }, "sort": [ { "timestamp": { "order": "desc" } } ] }
查詢結(jié)果:可以看到每一條數(shù)據(jù)都有一個sort部分,而下一頁的查詢需要本次查詢結(jié)果最后一條的sort值作為游標(biāo),實現(xiàn)分頁查詢
第二次查詢:
POST logdata-log-center-2021.05.06/_search { "search_after": [ 1620374316433 ], "size": 2, "query": { "exists": { "field": "networkLogDetailInfo" } }, "_source": { "includes": ["messageId", "system*", "service*", "timestamp"], "excludes": [] }, "sort": [ { "timestamp": { "order": "desc" } } ] }
Java API
public void testSearchAfter(Object[] values) throws IOException { //查詢源構(gòu)建器 SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.size(2); searchSourceBuilder.from(0); //searchAfter需要將from設(shè)置為0或-1,當(dāng)然也可以不寫 //查詢條件 searchSourceBuilder.query(QueryBuilders.existsQuery("networkLogDetailInfo")); //篩選字段,第一個參數(shù)為需要的字段,第二個參數(shù)為不需要的字段 searchSourceBuilder.fetchSource(new String[] {"messageId", "system*", "service*", "timestamp"}, new String[] {}); //以時間戳排序 searchSourceBuilder.sort("timestamp", SortOrder.DESC); if (values != null) searchSourceBuilder.searchAfter(values); SearchRequest request = new SearchRequest("logdata-log-center-2021.05.06"); request.source(searchSourceBuilder); SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT); if (RestStatus.OK.equals(response.status())) { //設(shè)置查詢總量 SearchHit[] hits = response.getHits().getHits(); for(int i = 0; i < hits.length; i++) { String index = hits[i].getIndex(); String id = hits[i].getId(); JSONObject jsonObject = JSON.parseObject(hits[i].getSourceAsString(), JSONObject.class); System.out.println(jsonObject); if (i == hits.length-1) { //最后一條數(shù)據(jù)的sortValue作為下一次查詢的游標(biāo)值 values = hits[i].getSortValues(); System.out.println(Arrays.toString(values)); } } } }
2.11.4 三種分頁方式特點
from+size
比較適合淺分頁模式,在深度分頁的情況下,這種使用方式效率是非常低的,隨著分頁頁碼的不斷增大,查詢的效率會直線下降。比如from = 5000, size=20, es 需要在各個分片上匹配排序并得到5000*20 條有效數(shù)據(jù),然后在結(jié)果集中取最后20條。除了效率上的問題,還有一個無法解決的問題是,es 目前支持最大的 skip 值是 max_result_window ,默認(rèn)為 10000 。也就是當(dāng) from + size > max_result_window 時,es 將返回錯誤。scroll
是一種滾屏形式的分頁檢索,滿足深度分頁的場景。查詢的時候生成一個游標(biāo) scroll_id,有效期內(nèi)每次返回的值是一樣的,后續(xù)的查詢只需要根據(jù)這個游標(biāo)去取數(shù)據(jù)即可。scroll查詢是很耗性能的方式,scroll_id 的生成可以理解為建立了一個臨時的歷史快照, 系統(tǒng)會耗費(fèi)大量的資源來保存一份當(dāng)前查詢結(jié)果集映像,并且會占用文件描述符,在此之后的增刪改查等操作不會影響到這個快照的結(jié)果,因此不建議在實時查詢中運(yùn)用。這種方式往往用于非實時處理大量數(shù)據(jù)的情況,比如要進(jìn)行數(shù)據(jù)遷移或者索引變更之類的。search_after
適用于深度分頁+ 排序,分頁是根據(jù)上一頁最后一條數(shù)據(jù)來定位下一頁的位置,所以無法跳頁請求,同時在分頁請求的過程中,如果有索引數(shù)據(jù)的增刪改,這些變更也會實時的反映到游標(biāo)上。在選擇search_after的排序字段時盡量使用比如文檔的id或者時間戳等具有唯一性的字段。search_after 相比 from+size 的淺分頁以及 scroll 滾屏查詢會有很大的性能提升。
2.22 聚合
es 的 aggs 對數(shù)據(jù)進(jìn)行聚合查詢統(tǒng)計,查詢方式如下:
## 統(tǒng)計各系統(tǒng)一個月的日志采集數(shù)量 POST log*/_search { "size": 0, "query": { "range": { "timestamp": { "gte": "2021-10-24 00:00:00", "lte": "2021-11-24 00:00:00", "format": "yyyy-MM-dd HH:mm:ss" } } }, "aggs": { "allLog": { "terms": { "field": "systemName.keyword", "size": 10 } } } }
Java API
@Test public void test() throws IOException { //按照systemName字段聚合統(tǒng)計各個系統(tǒng)的日志數(shù)量 TermsAggregationBuilder bySystemName = AggregationBuilders.terms("allLog").field("systemName.keyword"); RangeQueryBuilder timestamp = QueryBuilders.rangeQuery("timestamp") .gte("2021-10-24 00:00:00") .lte("2021-11-24 00:00:00") .format("yyyy-MM-dd HH:mm:ss"); //查詢源構(gòu)建器 SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); //配置聚合條件 searchSourceBuilder.aggregation(bySystemName); //配置查詢條件 searchSourceBuilder.query(timestamp); //設(shè)置查詢結(jié)果不返回,只返回聚合結(jié)果 searchSourceBuilder.size(0); //創(chuàng)建查詢請求對象,將查詢條件配置到其中 SearchRequest request = new SearchRequest("log*"); request.source(searchSourceBuilder); // 執(zhí)行搜索,向ES發(fā)起http請求 SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT); Aggregations aggregations = response.getAggregations(); if (aggregations != null) { Terms terms = aggregations.get("allLog"); //解析桶 for (Terms.Bucket bucket : terms.getBuckets()) { System.out.print("系統(tǒng)名稱:" + bucket.getKeyAsString()); System.out.println("\t總?cè)罩緮?shù)量:" + bucket.getDocCount()); } } }
多層嵌套聚合
## 統(tǒng)計各個系統(tǒng)的總?cè)罩緮?shù)量,按系統(tǒng)統(tǒng)計各種類型日志數(shù)量 POST log*/_search { "size": 0, "query": { "range": { "timestamp": { "gte": "2021-10-24 00:00:00", "lte": "2021-11-24 00:00:00", "format": "yyyy-MM-dd HH:mm:ss" } } }, "aggs": { "allLog": { "terms": { "field": "systemName.keyword", "size": 10 }, "aggs": { "errorLogNum": { "filter": { "terms": { "level.keyword": [ "ERROR", "FATAL" ] } } }, "dbLogNum": { "filter": { "exists": { "field": "dataLogDetailInfo" } } }, "interfaceLogNum": { "filter": { "exists": { "field": "networkLogDetailInfo" } } }, "serviceLogNum": { "filter": { "exists": { "field": "serviceLogDetailInfo" } } }, "webLogNum": { "filter": { "exists": { "field": "browserModel" } } } } } } }
Java API
@Test public void test() throws IOException { //錯誤日志聚合條件 FilterAggregationBuilder errorLogNum = AggregationBuilders.filter("errorLogNum", QueryBuilders.termsQuery("level.keyword", "ERROR", "FATAL")); //數(shù)據(jù)庫日志聚合條件 FilterAggregationBuilder dataLogNum = AggregationBuilders.filter("dbLogNum", QueryBuilders.existsQuery("dataLogDetailInfo")); //接口日志聚合條件 FilterAggregationBuilder networkLogNum = AggregationBuilders.filter("interfaceLogNum", QueryBuilders.existsQuery("networkLogDetailInfo")); //應(yīng)用日志聚合條件 FilterAggregationBuilder serviceLogNum = AggregationBuilders.filter("serviceLogNum", QueryBuilders.existsQuery("serviceLogDetailInfo")); //前端日志聚合條件 FilterAggregationBuilder webUpLogNum = AggregationBuilders.filter("webLogNum", QueryBuilders.existsQuery("browserModel")); //最外層聚合條件,第一次聚合的條件 TermsAggregationBuilder bySystemName = AggregationBuilders.terms("allLog").field("systemName.keyword").size(10); //內(nèi)部多個條件的子聚合,在系統(tǒng)聚合后的結(jié)果上二次聚合 bySystemName.subAggregation(errorLogNum) .subAggregation(dataLogNum). subAggregation(networkLogNum). subAggregation(serviceLogNum). subAggregation(webUpLogNum); RangeQueryBuilder timestamp = QueryBuilders.rangeQuery("timestamp") .gte("2021-10-24 00:00:00") .lte("2021-11-24 00:00:00") .format("yyyy-MM-dd HH:mm:ss"); //查詢源構(gòu)建器 SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); //配置聚合條件 searchSourceBuilder.aggregation(bySystemName); //配置查詢條件 searchSourceBuilder.query(timestamp); //設(shè)置查詢結(jié)果不返回,只返回聚合結(jié)果 searchSourceBuilder.size(0); //創(chuàng)建查詢請求對象,將查詢條件配置到其中 SearchRequest request = new SearchRequest("log*"); request.source(searchSourceBuilder); // 執(zhí)行搜索,向ES發(fā)起http請求 SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT); Aggregations aggregations = response.getAggregations(); if (aggregations != null) { Terms terms = aggregations.get("allLog"); for (Terms.Bucket bucket : terms.getBuckets()) { ParsedFilter dbFilter = bucket.getAggregations().get("dbLogNum"); ParsedFilter serviceFilter = bucket.getAggregations().get("serviceLogNum"); ParsedFilter webFilter = bucket.getAggregations().get("webLogNum"); ParsedFilter interfaceFilter = bucket.getAggregations().get("interfaceLogNum"); ParsedFilter errorFilter = bucket.getAggregations().get("errorLogNum"); System.out.print("系統(tǒng)名稱:" + bucket.getKeyAsString()); System.out.print("\t總?cè)罩荆? + bucket.getDocCount()); System.out.print("\t數(shù)據(jù)庫日志:" + dbFilter.getDocCount()); System.out.print("\t服務(wù)執(zhí)行日志:" + serviceFilter.getDocCount()); System.out.print("\t前端操作日志:" + webFilter.getDocCount()); System.out.print("\t接口日志:" + interfaceFilter.getDocCount()); System.out.println("\t錯誤日志:" + errorFilter.getDocCount()); } } }
聚合查詢還提供了許多查詢規(guī)則,按時間date聚合、count聚合、avg聚合、sum聚合、min聚合、max聚合等等,這里就不一一列舉了。
以上為個人經(jīng)驗,希望能給大家一個參考,也希望大家多多支持腳本之家。
相關(guān)文章
vscode 配置java環(huán)境并調(diào)試運(yùn)行的詳細(xì)過程
這篇文章主要介紹了vscode 配置java環(huán)境并調(diào)試運(yùn)行的詳細(xì)過程,本文給大家介紹的非常詳細(xì),對大家的學(xué)習(xí)或工作具有一定的參考借鑒價值,需要的朋友可以參考下2021-05-05Java中SimpleDateFormat 格式化日期的使用
本文主要介紹了Java中SimpleDateFormat 格式化日期的使用,文中通過示例代碼介紹的非常詳細(xì),具有一定的參考價值,感興趣的小伙伴們可以參考一下2022-03-03java進(jìn)制轉(zhuǎn)換工具類實現(xiàn)減少參數(shù)長度
這篇文章主要為大家介紹了java進(jìn)制轉(zhuǎn)換工具類實現(xiàn)減少參數(shù)長度示例詳解,有需要的朋友可以借鑒參考下,希望能夠有所幫助,祝大家多多進(jìn)步,早日升職加薪2023-02-02Java實現(xiàn)List反轉(zhuǎn)的方法總結(jié)
在Java中,反轉(zhuǎn)一個List意味著將其元素的順序顛倒,使得第一個元素變成最后一個,最后一個元素變成第一個,依此類推,這一操作在處理數(shù)據(jù)集合時非常有用,所以本文給大家總結(jié)了Java實現(xiàn)List反轉(zhuǎn)的方法,需要的朋友可以參考下2024-04-04