I have a search page which contains two search result types: summary result and concrete result.
- Summary result page contains top 3 result per category (top hits)
- Concrete result page contains all result for a selected category.
To obtain the Summary page I use the request:
var searchDescriptor = new SearchDescriptor<ElasticType>();
searchDescriptor.Index("index_name")
.Query(q =>
q.MultiMatch(m => m
.Fields(fs => fs
.Field(f => f.Content1, 3)
.Field(f => f.Content2, 2)
.Field(f => f.Content3, 1))
.Fuzziness(Fuzziness.EditDistance(1))
.Query(query)
.Boost(1.1)
.Slop(2)
.PrefixLength(1)
.MaxExpansions(100)
.Operator(Operator.Or)
.MinimumShouldMatch(2)
.FuzzyRewrite(RewriteMultiTerm.ConstantScoreBoolean)
.TieBreaker(1.0)
.CutoffFrequency(0.5)
.Lenient()
.ZeroTermsQuery(ZeroTermsQuery.All))
&& (q.Terms(t => t.Field(f => f.LanguageId).Terms(1)) || q.Terms(t => t.Field(f => f.LanguageId).Terms(0))))
.Aggregations(a => a
.Terms("category", tagd => tagd
.Field(f => f.Category)
.Size(10)
.Aggregations(aggs => aggs.TopHits("top_tag_hits", t => t.Size(3)))))
.FielddataFields(fs => fs
.Field(p => p.Content1, 3)
.Field(p => p.Content2, 2)
.Field(p => p.Content3, 1));
var elasticResult = _elasticClient.Search<ElasticType>(_ => searchDescriptor);
And I get result, for example
{
"aggregations": {
"category": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "category1",
"doc_count": 40,
"top_tag_hits": {
"hits": {
"total": 40,
"max_score": 5.4,
"hits": [{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 5.4,
"_source": {
"id": 1
}
},
{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 4.3,
"_source": {
"id": 3 // FAIL!
}
},
{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 4.3,
"_source": {
"id": 2
}
}]
}
}
}]
}
}
}
So i get few hits with the same _score
.
To obtain the concrete result (by category) page I use the request:
var searchDescriptor = new SearchDescriptor<ElasticType>();
searchDescriptor.Index("index_name")
.Size(perPage <= 0 ? 100 : perPage)
.From(page * perPage)
.Query(q => q
.MultiMatch(m => m
.Fields(fs => fs
.Field(f => f.Content1, 3)
.Field(f => f.Content2, 2)
.Field(f => f.Content3, 1)
.Field(f => f.Category))
.Fuzziness(Fuzziness.EditDistance(1))
.Query(searchRequest.Query)
.Boost(1.1)
.Slop(2)
.PrefixLength(1)
.MaxExpansions(100)
.Operator(Operator.Or)
.MinimumShouldMatch(2)
.FuzzyRewrite(RewriteMultiTerm.ConstantScoreBoolean)
.TieBreaker(1.0)
.CutoffFrequency(0.5)
.Lenient()
.ZeroTermsQuery(ZeroTermsQuery.All))
&& q.Term(t => t.Field(f => f.Category).Value(searchRequest.Category))
&& (q.Terms(t => t.Field(f => f.LanguageId).Terms(1)) || q.Terms(t => t.Field(f => f.LanguageId).Terms(0))))
.FielddataFields(fs => fs
.Field(p => p.Content1, 3)
.Field(p => p.Content2, 2)
.Field(p => p.Content3, 1))
.Aggregations(a => a
.Terms("category", tagd => tagd
.Field(f => f.Category)));
And the result something like this:
{
"hits": {
"total": 40,
"max_score": 7.816723,
"hits": [{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 7.816723,
"_source": {
"id": 1
}
},
{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 6.514713,
"_source": {
"id": 2
}
},
{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 6.514709,
"_source": {
"id": 3
}
}]
}
}
And so in the second case, for a specific category I get the _score with great precision and elastic can easily sort the results correctly. But in the case of aggregation there are results with the same _score
, and in this case, the sorting is not clear how it works.
Can someone direct me to the right path how to solve this problem? or how can I achieve the same order in the results? Maybe I can increase the accuracy for the aggregated results?
I use elasticsearch server version "5.3.0" and NEST library version "5.0.0".
Update: Native query for aggregation request:
{
"fielddata_fields": [
"content1^3",
"content2^2",
"content3^1"
],
"aggs": {
"category": {
"terms": {
"field": "category",
"size": 10
},
"aggs": {
"top_tag_hits": {
"top_hits": {
"size": 3
}
}
}
}
},
"query": {
"bool": {
"must": [
{
"multi_match": {
"boost": 1.1,
"query": "sparta",
"fuzzy_rewrite": "constant_score_boolean",
"fuzziness": 1,
"cutoff_frequency": 0.5,
"prefix_length": 1,
"max_expansions": 100,
"slop": 2,
"lenient": true,
"tie_breaker": 1.0,
"minimum_should_match": 2,
"operator": "or",
"fields": [
"content1^3",
"content2^2",
"content3^1"
],
"zero_terms_query": "all"
}
},
{
"bool": {
"should": [
{
"terms": {
"languageId": [
1
]
}
},
{
"terms": {
"languageId": [
0
]
}
}
]
}
}
]
}
}
}
Native query for concrete request:
{
"from": 0,
"size": 100,
"fielddata_fields": [
"content1^3",
"content2^2",
"content3^1"
],
"aggs": {
"category": {
"terms": {
"field": "category"
}
}
},
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"multi_match": {
"boost": 1.1,
"query": ".....",
"fuzzy_rewrite": "constant_score_boolean",
"fuzziness": 1,
"cutoff_frequency": 0.5,
"prefix_length": 1,
"max_expansions": 100,
"slop": 2,
"lenient": true,
"tie_breaker": 1.0,
"minimum_should_match": 2,
"operator": "or",
"fields": [
"content1^3",
"content2^2",
"content3^1",
"category"
],
"zero_terms_query": "all"
}
},
{
"term": {
"category": {
"value": "category1"
}
}
}
]
}
},
{
"bool": {
"should": [
{
"terms": {
"languageId": [
1
]
}
},
{
"terms": {
"languageId": [
0
]
}
}
]
}
}
]
}
}
}
Also i use next mapping for creating index:
var descriptor = new CreateIndexDescriptor(indexName)
.Mappings(ms => ms
.Map<ElasticType>(m => m
.Properties(ps => ps
.Keyword(s => s.Name(ecp => ecp.Title))
.Text(s => s.Name(ecp => ecp.Content1))
.Text(s => s.Name(ecp => ecp.Content2))
.Text(s => s.Name(ecp => ecp.Content3))
.Date(s => s.Name(ecp => ecp.Date))
.Number(s => s.Name(ecp => ecp.LanguageId).Type(NumberType.Integer))
.Keyword(s => s.Name(ecp => ecp.Category))
.Text(s => s.Name(ecp => ecp.PreviewImageUrl).Index(false))
.Text(s => s.Name(ecp => ecp.OptionalContent).Index(false))
.Text(s => s.Name(ecp => ecp.Url).Index(false)))));
_elasticClient.CreateIndex(indexName, _ => descriptor);
_score
. – Houseclean_score
then by some other field e.g.id
– Justice_score
and then byid
/date
. But in the case of a request for a specific category I get a different_score
, and sort does not need anything - all right. But in the case of aggregation I receive the same_score
for some results, the order can be the same as in the case of a request for a specific category, but maybe not. And it's not clear whether i need to sort at all. – Houseclean.Field(f => f.ContentName))
its my misprint, i updated this row too. – Houseclean