Elasticsearch search suggestion on array field with partial edge ngram completion

[ { "tags": [ "société générale", "consulting" ] }, { "tags": [ "big data", "big", "data"] }, { "tags": [ "data" ] }, { "tags": [ "data engineering" ] } { "tags": [ "consulting and management of IT" ] } ]

Elasticsearch doesn't extract matched an array item in hits and includes an entire array

Solution is a nested field with the inner_hits parameter in a query

Your documents

PUT /edge_suggestion/_bulk
{"create":{"_id":1}}
{"tags":["société générale","consulting"]}
{"create":{"_id":2}}
{"tags":["big data","big","data"]}
{"create":{"_id":3}}
{"tags":["data"]}
{"create":{"_id":4}}
{"tags":["data engineering"]}
{"create":{"_id":5}}
{"tags":["consulting and management of IT"]}

My source index is named edge_suggestion

Mapping of new index documents with a nested field for tags

PUT /edge_suggestion_with_nested
{
    "mappings": {
        "properties": {
            "tags": {
                "type": "text"
            },
            "tags_nested": {
                "type": "nested",
                "properties": {
                    "tag": {
                        "type": "text",
                        "analyzer": "lowercase_asciifolding_edge_ngram_standard_analyzer"
                    }
                }
            }
        }
    },
    "settings": {
        "analysis": {
            "analyzer": {
                "lowercase_asciifolding_edge_ngram_standard_analyzer": {
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "asciifolding",
                        "edge_ngram_filter"
                    ]
                }
            },
            "filter": {
                "edge_ngram_filter": {
                    "type": "edge_ngram",
                    "min_gram": 1,
                    "max_gram": 11
                }
            }
        }
    }
}

Reindex query for transformation array items into nested documents

POST _reindex
{
    "source": {
        "index": "edge_suggestion"
    },
    "dest": {
        "index": "edge_suggestion_with_nested"
    },
    "script": {
        "source": """
                List tags = ctx._source[params['tags_field_name']];
                List nestedTags = new LinkedList();
                
                for (String tag : tags) {
                    Map item = [params['nested_tag_field_name'] : tag];
                    nestedTags.add(item);
                }
                ctx._source[params['tags_nested_field_name']] = nestedTags;
        """,
        "params": {
            "nested_tag_field_name": "tag",
            "tags_field_name": "tags",
            "tags_nested_field_name": "tags_nested"
        }
    }
}

Search query for "data" with the same analyzer as document field, the and operator, and the inner_hits parameter

GET /edge_suggestion_with_nested/_search?filter_path=hits.hits.inner_hits.tags_nested.hits.hits._source
{
    "query": {
        "nested": {
            "path": "tags_nested",
            "query": {
                "match": {
                    "tags_nested.tag": {
                        "query": "data",
                        "operator": "and",
                        "analyzer": "lowercase_asciifolding_edge_ngram_standard_analyzer"
                    }
                }
            },
            "inner_hits": {}
        }
    }
}

Response

{
    "hits" : {
        "hits" : [
            {
                "inner_hits" : {
                    "tags_nested" : {
                        "hits" : {
                            "hits" : [
                                {
                                    "_source" : {
                                        "tag" : "data"
                                    }
                                }
                            ]
                        }
                    }
                }
            },
            {
                "inner_hits" : {
                    "tags_nested" : {
                        "hits" : {
                            "hits" : [
                                {
                                    "_source" : {
                                        "tag" : "data"
                                    }
                                },
                                {
                                    "_source" : {
                                        "tag" : "big data"
                                    }
                                }
                            ]
                        }
                    }
                }
            },
            {
                "inner_hits" : {
                    "tags_nested" : {
                        "hits" : {
                            "hits" : [
                                {
                                    "_source" : {
                                        "tag" : "data engineering"
                                    }
                                }
                            ]
                        }
                    }
                }
            }
        ]
    }
}

I filter only significant parts of the response

The "data" tag is duplicated. Some nested documents have this tag

Search query for "IT consulting"

GET /edge_suggestion_with_nested/_search?filter_path=hits.hits.inner_hits.tags_nested.hits.hits._source
{
    "query": {
        "nested": {
            "path": "tags_nested",
            "query": {
                "match": {
                    "tags_nested.tag": {
                        "query": "IT consulting",
                        "operator": "and",
                        "analyzer": "lowercase_asciifolding_edge_ngram_standard_analyzer"
                    }
                }
            },
            "inner_hits": {}
        }
    }
}

Response

{
    "hits" : {
        "hits" : [
            {
                "inner_hits" : {
                    "tags_nested" : {
                        "hits" : {
                            "hits" : [
                                {
                                    "_source" : {
                                        "tag" : "consulting and management of IT"
                                    }
                                }
                            ]
                        }
                    }
                }
            }
        ]
    }
}

Recommended topics

Hot tags