Elasticsearch: order by length of array properties

Elasticsearch: order by length of array properties - java

im using Kibana and ES, i have an index with objects and an array called "reviews" that inside has properties called positive_comment, negative_comment, reviewer_name and more.
There are some reviews that have the field positive_comment empty (not null, just empty).
I need to order by the length of the field positive_comment first, so reviews with empty positive_comment comes last. The results are the same when ordering in SQL with LEN() property.
This is my query.
I also tried:
.value.size() in the script, or type "string" but no results.
{
"_source":[
"reviews.positive_comment"
],
"query":{
"bool":{
"filter":[
{
"term":{
"id":214
}
}
]
}
},
"sort":{
"_script":{
"script":"doc['reviews.positive_comment'].value.length()",
"type":"number",
"order":"asc"
}
}
}
This is my result:
{
"_source":{
"reviews":[
{
"positive_comment":"Great"
},
{
"positive_comment":"Really good product"
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":"Awesome"
}
]
},
"sort":[
"0"
]
}

Elasticsearch doesn't support counting array elements, unless you use a script.
However, running a script for every matching document is going to degrade performance for every search query.
A better solution would be to count the values once, at index-time, and store the counts in dedicated fields (positive_comments_count, negative_comments_count, etc.) and use these fields for sorting.

Try following. It worked for me on ES 5.6.3. So, should work on higher versions too.
GET test/test/_search?filter_path=hits.hits
{
"query": {
"match_all": {}
},
"sort": {
"_script": {
"type": "number",
"script": {
"lang": "painless",
"source": "doc['reviews.positive_comment'].value.length()"
},
"order": "asc"
}
}
}
I have tested on ES 7.1 version too.
Mapping
PUT test
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 1
},
"mappings": {
"properties": {
"somefield": {
"type": "keyword"
},
"reviews": {
"properties": {
"positive_comment": {
"type": "keyword"
},
"item_id": {
"type": "double"
}
}
}
}
}
}
Query:
GET test/_search
{
"query": {
"match_all": {}
},
"_source": "reviews.positive_comment",
"sort": {
"_script": {
"type": "number",
"script": {
"lang": "painless",
"source": "doc['reviews.positive_comment'].value.length() % 100"
},
"order": "asc"
}
}
}
output
{
"hits" : {
"total" : {
"value" : 5,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "test",
"_type" : "_doc",
"_id" : "test5",
"_score" : null,
"_source" : {
"reviews" : [
{
"positive_comment" : ""
}
]
},
"sort" : [
0.0
]
},
{
"_index" : "test",
"_type" : "_doc",
"_id" : "test1",
"_score" : null,
"_source" : {
"reviews" : [
{
"positive_comment" : "Awesome"
}
]
},
"sort" : [
7.0
]
},
{
"_index" : "test",
"_type" : "_doc",
"_id" : "test3",
"_score" : null,
"_source" : {
"reviews" : [
{
"positive_comment" : "What a product"
}
]
},
"sort" : [
14.0
]
},
{
"_index" : "test",
"_type" : "_doc",
"_id" : "test2",
"_score" : null,
"_source" : {
"reviews" : [
{
"positive_comment" : "What a product.. amazing"
}
]
},
"sort" : [
24.0
]
},
{
"_index" : "test",
"_type" : "_doc",
"_id" : "test4",
"_score" : null,
"_source" : {
"reviews" : [
{
"positive_comment" : "Thats a great product.. "
}
]
},
"sort" : [
24.0
]
}
]
}
}

Related

MongoDB : fields are excluded when invoke multiple lookup stages

db={
"dashboard": [
{
"_id": "dashboard1",
"name": "test",
"user": 1
}
],
"templatefolders": [
{
"dashboardId": "dashboard1",
"folderId": "folder123",
"name": "folder",
"region": "XXX"
}
],
"folders": [
{
"_id": "folder123"
}
],
"user": [
{
"_id": 1,
"name": "alaa"
}
],
}
this is my function:
db.dashboard.aggregate([
{
"$lookup": {
"from": "templatefolders",
"localField": "_id",
"foreignField": "dashboardId",
"as": "joinDashboard"
}
},
{
"$lookup": {
"from": "folders",
"localField": "joinDashboard.folderId",
"foreignField": "_id",
"as": "joinDashboard.joinFolder"
}
},
])
Result :
[
{
"_id": "dashboard1",
"joinDashboard": {
"joinFolder": [
{
"_id": "folder123"
}
]
},
"name": "test",
"user": 1
}
]
[![enter image description here][1]][1]
Why the fields name and region in collection templatefolders are excluded ?
I want to know why this behavior ? I don't like to use unwind because i have multiple collections with multiple refrence relation.

Your second $lookup, is overriding the joinDashboard key completely. Since you want joinFolder to be within joinDashboard, you can try nested lookups like this:
db.dashboard.aggregate([
{
$lookup: {
from: "templatefolders",
let: {
"boardId": "$_id"
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$dashboardId",
"$$boardId"
]
}
}
},
{
$lookup: {
from: "folders",
let: {
"folderId": "$folderId"
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$_id",
"$$folderId"
]
}
}
},
],
as: "joinFolder"
},
},
],
as: "joinDashboard"
}
}
])
MongoPlayground link.

elasticsearch search by special character

I have a set of the following phrases: [remix], [18+], etc. How can I make a search by one character, for example "[", to find all these variants ?
Right now I have the following analyzers config:
{
"analysis": {
"analyzer": {
{ "bigram_analyzer": {
{ "type": "custom",
{ "tokenizer": { "keyword",
{ "filter": [
{ "lowercase",
"bigram_filter".
]
},
{ "full_text_analyzer": {
{ "type": "custom",
{ "tokenizer": { "ngram_tokenizer",
{ "filter": [
"lowercase"
]
}
},
{ "filter": {
{ "bigram_filter": {
{ "type": "edge_ngram",
{ "max_gram": 2
}
},
{ "tokenizer": {
{ "ngram_tokenizer": {
{ "type": "ngram",
{ "min_gram": 3,
{ "max_gram": 3,
{ "token_chars": [
{ "letter",
{ "digit",
{ "symbol",
"punctuation"
]
}
}
}
}
Mapping occurs at the java entity level using the spring boot data elasticsearch starter

If I understand your problem correctly - you want to implement an autocomplete analyzer that will return any term that starts with [ or any other character. To do so you can create a custom analyzer using ngram autocomplete. Here is an example:
Here is the testing index:
PUT /testing-index-v3
{
"settings": {
"number_of_shards": 1,
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 15
}
},
"analyzer": {
"autocomplete": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"lowercase",
"autocomplete_filter"
]
}
}
}
},
"mappings": {
"properties": {
"term": {
"type": "text",
"analyzer": "autocomplete"
}
}
}
}
Here is the documents input:
POST /testing-index-v3/_doc
{
"term": "[+18]"
}
POST testing-index-v3/_doc
{
"term": "[remix]"
}
POST testing-index-v3/_doc
{
"term": "test"
}
And finally our search:
GET testing-index-v3/_search
{
"query": {
"match": {
"term": {
"query": "[remi",
"analyzer": "keyword",
"fuzziness": 0
}
}
}
}
As you can see I chose the keyword tokenizer for the autocomplete filter. I'm using ngram filter with min_gram: 1 and max_gram 15 which means our query will be separated into tokens like this:
input-query = i, in, inp, inpu, input .. and etc. Separates up to 15 tokens. This is wanted only at indexing time. Looking at the query we specify keyword analyzer as well - this analyzer is for the search time and it hard matches results. Here are some example searches and results:
GET testing-index-v3/_search
{
"query": {
"match": {
"term": {
"query": "[",
"analyzer": "keyword",
"fuzziness": 0
}
}
}
}
result:
"hits" : [
{
"_index" : "testing-index-v3",
"_type" : "_doc",
"_id" : "w5c_IHsBGGZ-oIJIi-6n",
"_score" : 0.7040055,
"_source" : {
"term" : "[remix]"
}
},
{
"_index" : "testing-index-v3",
"_type" : "_doc",
"_id" : "xJc_IHsBGGZ-oIJIju7m",
"_score" : 0.7040055,
"_source" : {
"term" : "[+18]"
}
}
]
GET testing-index-v3/_search
{
"query": {
"match": {
"term": {
"query": "[+",
"analyzer": "keyword",
"fuzziness": 0
}
}
}
}
result:
"hits" : [
{
"_index" : "testing-index-v3",
"_type" : "_doc",
"_id" : "xJc_IHsBGGZ-oIJIju7m",
"_score" : 0.7040055,
"_source" : {
"term" : "[+18]"
}
}
]
Hope this answer helps you. Good luck with your adventures with elasticsearch!

Elastic search - Java api to search on multiple fields not giving exact results

Am search my input keyword in multiple field using java api QueryBuilders.multiMatchQuery its not giving me exact results. Its giving some random results which is not relavant to my search query.
Please find my code below.
SearchRequest contentSearchRequest = new SearchRequest(ATTACHMENT);
SearchSourceBuilder contentSearchSourceBuilder = new SearchSourceBuilder();
contentSearchRequest.types(TYPE);
MultiMatchQueryBuilder attachmentQB = QueryBuilders.multiMatchQuery(keyword, "attachment.content","catalog_keywords","product_keywords");
contentSearchSourceBuilder.query(attachmentQB);
contentSearchSourceBuilder.size(5);
Please find my mapping details :
PUT document_attachment
{
"settings": {
"analysis": {
"analyzer": {
"custom_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"char_filter": [
"html_strip"
],
"filter": [
"lowercase",
"asciifolding"
]
},
"product_catalog_keywords_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"char_filter": [
"html_strip"
],
"filter": [
"lowercase",
"asciifolding"
]
}
}
}
},
"mappings" : {
"doc" : {
"properties" : {
"attachment" : {
"properties" : {
"content" : {
"type" : "text",
"analyzer": "custom_analyzer"
},
"content_length" : {
"type" : "long"
},
"content_type" : {
"type" : "text"
},
"language" : {
"type" : "text"
}
}
},
"fileContent" : {
"type" : "text"
},
"id": {
"type": "long"
},
"catalog_keywords" : {
"type" : "text",
"analyzer": "product_catalog_keywords_analyzer"
},
"product_keywords" : {
"type" : "text",
"analyzer": "product_catalog_keywords_analyzer"
},
}
}
}
}

"MapperParsingException[Analyzer [second] not found for field [Name]]"

I have created an index in Elasticsearch with the following settings:
{
"my_index" : {
"aliases" : { },
"mappings" : { },
"settings" : {
"index" : {
"creation_date" : "1461229073677",
"uuid" : "7-TECarfRs6XO8yZE7SeWA",
"number_of_replicas" : "1",
"number_of_shards" : "5",
"version" : {
"created" : "1040599"
},
"settings" : {
"analysis" : {
"analyzer" : {
"second" : {
"type" : "custom",
"filter" : [ "lowercase", "synonym" ],
"tokenizer" : "standard"
}
},
"filter" : {
"synonym" : {
"type" : "synonym",
"synonyms" : [ "i pad => ipad", "smart phone => smartphone" ]
}
}
}
}
}
},
"warmers" : { }
}
}
Now what I m trying to do is to set the mappings using the following code:
PutMapping putMapping = new PutMapping.Builder(
"my_index",
"my_index_type",
"{ \"properties\" : { \"Name\" : {\"type\" : \"string\", \"analyzer\" : \"second\"} } }"
).build();
JestResult result = client.execute(createIndex);
result = client.execute(putMapping);
EDIT
The code I m using to create the index is:
CreateIndex createIndex = new CreateIndex.Builder(indexName)
.settings(
ImmutableSettings.builder()
.loadFromClasspath(
"settings.json"
).build().getAsMap()
).build();
JestResult result = client.execute(createIndex);
and the settings.json looks like this:
{
"settings": {
"analysis": {
"analyzer": {
"second": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"synonym"
]
}
},
"filter": {
"synonym" : {
"type" : "synonym",
"synonyms" : [
"i pad => ipad",
"smart phone => smartphone",
"i phone => iphone"
]
}
}
}
}
}
However I keep getting the following error:
"MapperParsingException[Analyzer [second] not found for field [message]]"
I am able to set the mapping if I remove the "analyzer". So it seems that I have two times the "settings" section, but no matter how I structure the "settings.json" file I keep getting these two sections. I looked into the examples specified in JEST page but didnt help me. https://github.com/searchbox-io/Jest/blob/master/jest/README.md
Any ideas guys?

The settings you're using are not properly defined, i.e. you have two imbricated settings sections, the index settings should look like this instead:
{
"my_index": {
"aliases": {},
"mappings": {},
"settings": {
"index": {
"number_of_replicas": "1",
"number_of_shards": "5"
},
"analysis": {
"analyzer": {
"second": {
"type": "custom",
"filter": [
"lowercase",
"synonym"
],
"tokenizer": "standard"
}
},
"filter": {
"synonym": {
"type": "synonym",
"synonyms": [
"i pad => ipad",
"smart phone => smartphone"
]
}
}
}
},
"warmers": {}
}
}
UPDATE
Your settings.json file simply needs to contain the following:
{
"analysis": {
"analyzer": {
"second": {
"type": "custom",
"filter": [
"lowercase",
"synonym"
],
"tokenizer": "standard"
}
},
"filter": {
"synonym": {
"type": "synonym",
"synonyms": [
"i pad => ipad",
"smart phone => smartphone"
]
}
}
}
}

Elasticsearch and mongodb, partial search not working

This is my location Collection in mongodb:
{ "_id" : ObjectId("5270d36f28f31fd8fa016441"), "stateName" : "A5", "cityName" : "ABCNEW2" }
{ "_id" : ObjectId("5270d37328f31fd8fa016442"), "stateName" : "A5", "cityName" : "ABC" }
{ "_id" : ObjectId("5270d37b28f31fd8fa016443"), "stateName" : "65", "cityName" : "ABCRW" }
I created one index using elastic Search:
POST /bwitter
{"index":
{ "number_of_shards": 1,
"analysis": {
"filter": {
"mynGram" : {"type": "nGram", "min_gram": 2, "max_gram": 10}
},
"analyzer": { "a1" : {
"type":"custom",
"tokenizer": "standard",
"filter": ["lowercase", "mynGram"]
}
}
}
}
}
I created one mapping using elastic search:
PUT /bwitter/bweet/_mapping
{
"bweet" : {
"index_analyzer" : "a1",
"search_analyzer" : "standard",
"properties" : {
"stateName": {"type":"string", "analyzer":"standard"},
"cityName" : {"type" : "string" }
}
}
}
Created river as follows:
PUT /_river/mongodb/_meta
{
"type": "mongodb",
"mongodb": {
"db": "rakeshdb",
"collection": "locations"
},
"index": {
"name": "locations",
"type": "bweet"
}
}
If, I query GET /locations/_search?q=ABC, I get only one record (Full word search is happening, not happening for the partial word)
I almost spend a whole day on this but not able to solve this. Where am I going wrong?

I guess that it should be:
PUT /_river/mongodb/_meta
{
"type": "mongodb",
"mongodb": {
"db": "rakeshdb",
"collection": "locations"
},
"index": {
"name": "bwitter",
"type": "bweet"
}
}

We Keep Coding

Java is a programming language and computing platform first released by Sun Microsystems in 1995.

Elasticsearch: order by length of array properties - java

Related

MongoDB : fields are excluded when invoke multiple lookup stages

elasticsearch search by special character

Elastic search - Java api to search on multiple fields not giving exact results

"MapperParsingException[Analyzer [second] not found for field [Name]]"

Elasticsearch and mongodb, partial search not working

Categories

Resources