Elasticsearch range bucket aggregation based on doc_count
Asked Answered
C

1

10

I have an elasticsearch aggregation query like this.

{
    "aggs": {
        "customer": {
            "aggs": {
                "Total_Sale": {
                    "sum": {
                        "field": "amount"
                    }
                }
            },
            "terms": {
                "field": "org",
                "size": 50000
            }
        }
    }
}

And it results in bucket aggregation like following

{
    "aggregations": {
        "customer": {
            "buckets": [
                {
                    "Total_Sale": { "value": 9999 },
                    "doc_count": 8,
                    "key": "cats"
                },
                {
                    "Total_Sale": { "value": 8888 },
                    "doc_count": 6,
                    "key": "tigers"
                },
                {
                    "Total_Sale": { "value": 444},
                    "doc_count": 5,
                    "key": "lions"
                },
                {
                    "Total_Sale": { "value": 555 },
                    "doc_count": 2,
                    "key": "wolves"
                }
           ]
       }
    }
}

I want another range bucket aggregation based on doc_count. So, final result required is

{
    "buckets": [    
        {               
            "Sum_of_Total_Sale": 555, // If I can form bucket, I can get this using sum_bucket. So, getting bucket is important.
            "Sum_of_doc_count": 2, 
            "doc_count": 1, 
            "key": "*-3",   
            "to": 3.0       
        },              
        {               
            "Sum_of_Total_Sale": 9332,
            "Sum_of_doc_count": 11,
            "doc_count": 2, 
            "from": 4.0,    
            "key": "4-6",   
            "to": 6.0       
        },                  
        {               
            "Sum_of_Total_Sale": 9999,
            "Sum_of_doc_count": 8,
            "doc_count": 1, 
            "from": 7.0,    
            "key": "7-*"    
        }                   
    ]                   
}  
  • Bucket Selector Aggregation and then using bucket sum aggregation will not work because there is more than one key for range.
  • Bucket Script Aggregation does calculation within bucket.
  • Can I add scripted doc field for each document which help me to create these buckets?
Crosstie answered 28/7, 2016 at 6:49 Comment(0)
E
5

There's no aggregation that I know of that can allow you to do this in one shot. however, there is one technique that I use from time to time to overcome this limitation. The idea is to repeat the same terms/sum aggregation and then use a bucket_selector pipeline aggregation for each of the ranges you're interested in.

POST index/_search
{
  "size": 0,
  "aggs": {
    "*-3": {
      "terms": {
        "field": "org",
        "size": 1000
      },
      "aggs": {
        "Total_Sale": {
          "sum": {
            "field": "amount"
          }
        },
        "*-3": {
          "bucket_selector": {
            "buckets_path": {
              "docCount": "_count"
            },
            "script": "params.docCount <= 3"
          }
        }
      }
    },
    "*-3_Total_Sales": {
      "sum_bucket": {
        "buckets_path": "*-3>Total_Sale"
      }
    },
    "*-3_Total_Docs": {
      "sum_bucket": {
        "buckets_path": "*-3>_count"
      }
    },
    "4-6": {
      "terms": {
        "field": "org",
        "size": 1000
      },
      "aggs": {
        "Total_Sale": {
          "sum": {
            "field": "amount"
          }
        },
        "4-6": {
          "bucket_selector": {
            "buckets_path": {
              "docCount": "_count"
            },
            "script": "params.docCount >= 4 && params.docCount <= 6"
          }
        }
      }
    },
    "4-6_Total_Sales": {
      "sum_bucket": {
        "buckets_path": "4-6>Total_Sale"
      }
    },
    "4-6_Total_Docs": {
      "sum_bucket": {
        "buckets_path": "4-6>_count"
      }
    },
    "7-*": {
      "terms": {
        "field": "org",
        "size": 1000
      },
      "aggs": {
        "Total_Sale": {
          "sum": {
            "field": "amount"
          }
        },
        "7-*": {
          "bucket_selector": {
            "buckets_path": {
              "docCount": "_count"
            },
            "script": "params.docCount >= 7"
          }
        }
      }
    },
    "7-*_Total_Sales": {
      "sum_bucket": {
        "buckets_path": "7-*>Total_Sale"
      }
    },
    "7_*_Total_Docs": {
      "sum_bucket": {
        "buckets_path": "7-*>_count"
      }
    }
  }
}

You'll get an answer that looks like this, which contains exactly the figures you're looking for in the xyz_Total_Sales and xyz_Total_Docs results:

  "aggregations": {
    "*-3": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "wolves",
          "doc_count": 2,
          "Total_Sale": {
            "value": 555
          }
        }
      ]
    },
    "7-*": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "cats",
          "doc_count": 8,
          "Total_Sale": {
            "value": 9999
          }
        }
      ]
    },
    "4-6": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "tigers",
          "doc_count": 6,
          "Total_Sale": {
            "value": 8888
          }
        },
        {
          "key": "lions",
          "doc_count": 5,
          "Total_Sale": {
            "value": 444
          }
        }
      ]
    },
    "*-3_Total_Sales": {
      "value": 555
    },
    "*-3_Total_Docs": {
      "value": 2
    },
    "4-6_Total_Sales": {
      "value": 9332
    },
    "4-6_Total_Docs": {
      "value": 11
    },
    "7-*_Total_Sales": {
      "value": 9999
    },
    "7_*_Total_Docs": {
      "value": 8
    }
  }
Entice answered 29/9, 2018 at 4:21 Comment(1)
How to visualize it in Kibana? I imagine x-axis are [-3_Total_Sales, 4-6_Total_Sales, 7-_Total_Sales] and y-axis is theirs value.Stearn

© 2022 - 2024 — McMap. All rights reserved.