ElasticSearch7.3学习(二十七)—-聚合概念(bucket和metric)及其示例

一、两个核心概念:bucket和metric

1.1 bucket

有如下数据

city name 
北京 张三 
北京 李四
天津 王五
天津 赵六
天津 王麻子

划分出来两个bucket,一个是北京bucket,一个是天津bucket

北京bucket:包含了2个人,张三,李四

上海bucket:包含了3个人,王五,赵六,王麻子

1.2 metric

metric,就是对一个bucket执行的某种聚合分析的操作,比如说求平均值,求最大值,求最小值

比如下面的一个sql语句

select count(*) from book group studymodel

bucket:group by studymodel --> 那些studymodel相同的数据,就会被划分到一个bucket中

metric:count(*),对每个bucket中所有的数据,计算一个数量。例如avg(),sum(),max(),min()

二、聚合示例

2.1 数据准备

首先创建book索引

PUT /book/ {   "settings": {     "number_of_shards": 1,     "number_of_replicas": 0   },   "mappings": {     "properties": {       "name": {         "type": "text",         "analyzer": "ik_max_word",         "search_analyzer": "ik_smart"       },       "description": {         "type": "text",         "analyzer": "ik_max_word",         "search_analyzer": "ik_smart"       },       "studymodel": {         "type": "keyword"       },       "price": {         "type": "double"       },       "timestamp": {         "type": "date",         "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"       },       "pic": {         "type": "text",         "index": false       }     }   } }

添加测试数据

PUT /book/_doc/1 {   "name": "Bootstrap开发",   "description": "Bootstrap是一个非常流行的开发框架。此开发框架可以帮助不擅长css页面开发的程序人员轻松的实现一个css,不受浏览器限制的精美界面css效果。",   "studymodel": "201002",   "price": 38.6,   "timestamp": "2019-08-25 19:11:35",   "pic": "group1/M00/00/00/wKhlQFs6RCeAY0pHAAJx5ZjNDEM428.jpg",   "tags": [     "bootstrap",     "dev"   ] }  PUT /book/_doc/2 {   "name": "java编程思想",   "description": "java语言是世界第一编程语言,在软件开发领域使用人数最多。",   "studymodel": "201001",   "price": 68.6,   "timestamp": "2019-08-25 19:11:35",   "pic": "group1/M00/00/00/wKhlQFs6RCeAY0pHAAJx5ZjNDEM428.jpg",   "tags": [     "java",     "dev"   ] }  PUT /book/_doc/3 {   "name": "spring开发基础",   "description": "spring 在java领域非常流行,java程序员都在用。",   "studymodel": "201001",   "price": 88.6,   "timestamp": "2019-08-24 19:11:35",   "pic": "group1/M00/00/00/wKhlQFs6RCeAY0pHAAJx5ZjNDEM428.jpg",   "tags": [     "spring",     "java"   ] }

2.2 计算每个studymodel下的商品数量

sql语句: select studymodel,count(*) from book group by studymodel

"size": 0,   ==>  作用 :只需要聚合的数据,不需要查询的数据

GET /book/_search {   "size": 0,   "query": {     "match_all": {}   },   "aggs": {     "group_by_model": {       "terms": {         "field": "studymodel"       }     }   } }

结果:

{   "took" : 2,   "timed_out" : false,   "_shards" : {     "total" : 1,     "successful" : 1,     "skipped" : 0,     "failed" : 0   },   "hits" : {     "total" : {       "value" : 3,       "relation" : "eq"     },     "max_score" : null,     "hits" : [ ]   },   "aggregations" : {     "group_by_model" : {       "doc_count_error_upper_bound" : 0,       "sum_other_doc_count" : 0,       "buckets" : [         {           "key" : "201001",           "doc_count" : 2         },         {           "key" : "201002",           "doc_count" : 1         }       ]     }   } }

2.3 计算每个tags下的商品数量

设置字段"fielddata": true,不设置会报错

PUT /book/_mapping/ {   "properties": {     "tags": {       "type": "text",       "fielddata": true     }   } }

查询

GET /book/_search {   "size": 0,    "query": {     "match_all": {}   },    "aggs": {     "group_by_tags": {       "terms": { "field": "tags" }     }   } }

结果:

{   "took" : 2,   "timed_out" : false,   "_shards" : {     "total" : 1,     "successful" : 1,     "skipped" : 0,     "failed" : 0   },   "hits" : {     "total" : {       "value" : 3,       "relation" : "eq"     },     "max_score" : null,     "hits" : [ ]   },   "aggregations" : {     "group_by_tags" : {       "doc_count_error_upper_bound" : 0,       "sum_other_doc_count" : 0,       "buckets" : [         {           "key" : "dev",           "doc_count" : 2         },         {           "key" : "java",           "doc_count" : 2         },         {           "key" : "bootstrap",           "doc_count" : 1         },         {           "key" : "spring",           "doc_count" : 1         }       ]     }   } }

2.4 加上搜索条件,计算每个tags下的商品数量

GET /book/_search {   "size": 0,    "query": {     "match": {       "description": "java程序员"     }   },    "aggs": {     "group_by_tags": {       "terms": { "field": "tags" }     }   } }

结果:

{   "took" : 70,   "timed_out" : false,   "_shards" : {     "total" : 1,     "successful" : 1,     "skipped" : 0,     "failed" : 0   },   "hits" : {     "total" : {       "value" : 2,       "relation" : "eq"     },     "max_score" : null,     "hits" : [ ]   },   "aggregations" : {     "group_by_tags" : {       "doc_count_error_upper_bound" : 0,       "sum_other_doc_count" : 0,       "buckets" : [         {           "key" : "java",           "doc_count" : 2         },         {           "key" : "dev",           "doc_count" : 1         },         {           "key" : "spring",           "doc_count" : 1         }       ]     }   } }

2.5 计算每个tag下的商品的平均价格

子聚合

GET /book/_search {   "size": 0,   "aggs": {     "group_by_tags": {       "terms": {         "field": "tags"       },       "aggs": {         "avg_price": {           "avg": {             "field": "price"           }         }       }     }   } }

结果:

{   "took" : 0,   "timed_out" : false,   "_shards" : {     "total" : 1,     "successful" : 1,     "skipped" : 0,     "failed" : 0   },   "hits" : {     "total" : {       "value" : 3,       "relation" : "eq"     },     "max_score" : null,     "hits" : [ ]   },   "aggregations" : {     "group_by_tags" : {       "doc_count_error_upper_bound" : 0,       "sum_other_doc_count" : 0,       "buckets" : [         {           "key" : "dev",           "doc_count" : 2,           "avg_price" : {             "value" : 53.599999999999994           }         },         {           "key" : "java",           "doc_count" : 2,           "avg_price" : {             "value" : 78.6           }         },         {           "key" : "bootstrap",           "doc_count" : 1,           "avg_price" : {             "value" : 38.6           }         },         {           "key" : "spring",           "doc_count" : 1,           "avg_price" : {             "value" : 88.6           }         }       ]     }   } }

2.6 计算每个tag下的商品的平均价格,按照平均价格降序排序

小技巧,如果是查询全部,match_all可省略

GET /book/_search {   "size": 0,   "aggs": {     "group_by_tags": {       "terms": {         "field": "tags",         "order": {           "avg_price": "desc"         }       },       "aggs": {         "avg_price": {           "avg": {             "field": "price"           }         }       }     }   } }

结果:

{   "took" : 4,   "timed_out" : false,   "_shards" : {     "total" : 1,     "successful" : 1,     "skipped" : 0,     "failed" : 0   },   "hits" : {     "total" : {       "value" : 3,       "relation" : "eq"     },     "max_score" : null,     "hits" : [ ]   },   "aggregations" : {     "group_by_tags" : {       "doc_count_error_upper_bound" : 0,       "sum_other_doc_count" : 0,       "buckets" : [         {           "key" : "spring",           "doc_count" : 1,           "avg_price" : {             "value" : 88.6           }         },         {           "key" : "java",           "doc_count" : 2,           "avg_price" : {             "value" : 78.6           }         },         {           "key" : "dev",           "doc_count" : 2,           "avg_price" : {             "value" : 53.599999999999994           }         },         {           "key" : "bootstrap",           "doc_count" : 1,           "avg_price" : {             "value" : 38.6           }         }       ]     }   } }

2.7 按照指定的价格范围区间进行分组,然后在每组内再按照tag进行分组,最后再计算每组的平均价格

GET /book/_search {   "size": 0,   "aggs": {     "group_by_price": {       "range": {         "field": "price",         "ranges": [           {             "from": 0,             "to": 40           },           {             "from": 40,             "to": 60           },           {             "from": 60,             "to": 80           }         ]       },       "aggs": {         "group_by_tags": {           "terms": {             "field": "tags"           },           "aggs": {             "average_price": {               "avg": {                 "field": "price"               }             }           }         }       }     }   } }

结果:

{   "took" : 5,   "timed_out" : false,   "_shards" : {     "total" : 1,     "successful" : 1,     "skipped" : 0,     "failed" : 0   },   "hits" : {     "total" : {       "value" : 3,       "relation" : "eq"     },     "max_score" : null,     "hits" : [ ]   },   "aggregations" : {     "group_by_price" : {       "buckets" : [         {           "key" : "0.0-40.0",           "from" : 0.0,           "to" : 40.0,           "doc_count" : 1,           "group_by_tags" : {             "doc_count_error_upper_bound" : 0,             "sum_other_doc_count" : 0,             "buckets" : [               {                 "key" : "bootstrap",                 "doc_count" : 1,                 "average_price" : {                   "value" : 38.6                 }               },               {                 "key" : "dev",                 "doc_count" : 1,                 "average_price" : {                   "value" : 38.6                 }               }             ]           }         },         {           "key" : "40.0-60.0",           "from" : 40.0,           "to" : 60.0,           "doc_count" : 0,           "group_by_tags" : {             "doc_count_error_upper_bound" : 0,             "sum_other_doc_count" : 0,             "buckets" : [ ]           }         },         {           "key" : "60.0-80.0",           "from" : 60.0,           "to" : 80.0,           "doc_count" : 1,           "group_by_tags" : {             "doc_count_error_upper_bound" : 0,             "sum_other_doc_count" : 0,             "buckets" : [               {                 "key" : "dev",                 "doc_count" : 1,                 "average_price" : {                   "value" : 68.6                 }               },               {                 "key" : "java",                 "doc_count" : 1,                 "average_price" : {                   "value" : 68.6                 }               }             ]           }         }       ]     }   } }

 

发表评论

相关文章