elasticsearch search by special character - java

I have a set of the following phrases: [remix], [18+], etc. How can I make a search by one character, for example "[", to find all these variants ?
Right now I have the following analyzers config:
{
"analysis": {
"analyzer": {
{ "bigram_analyzer": {
{ "type": "custom",
{ "tokenizer": { "keyword",
{ "filter": [
{ "lowercase",
"bigram_filter".
]
},
{ "full_text_analyzer": {
{ "type": "custom",
{ "tokenizer": { "ngram_tokenizer",
{ "filter": [
"lowercase"
]
}
},
{ "filter": {
{ "bigram_filter": {
{ "type": "edge_ngram",
{ "max_gram": 2
}
},
{ "tokenizer": {
{ "ngram_tokenizer": {
{ "type": "ngram",
{ "min_gram": 3,
{ "max_gram": 3,
{ "token_chars": [
{ "letter",
{ "digit",
{ "symbol",
"punctuation"
]
}
}
}
}
Mapping occurs at the java entity level using the spring boot data elasticsearch starter

If I understand your problem correctly - you want to implement an autocomplete analyzer that will return any term that starts with [ or any other character. To do so you can create a custom analyzer using ngram autocomplete. Here is an example:
Here is the testing index:
PUT /testing-index-v3
{
"settings": {
"number_of_shards": 1,
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 15
}
},
"analyzer": {
"autocomplete": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"lowercase",
"autocomplete_filter"
]
}
}
}
},
"mappings": {
"properties": {
"term": {
"type": "text",
"analyzer": "autocomplete"
}
}
}
}
Here is the documents input:
POST /testing-index-v3/_doc
{
"term": "[+18]"
}
POST testing-index-v3/_doc
{
"term": "[remix]"
}
POST testing-index-v3/_doc
{
"term": "test"
}
And finally our search:
GET testing-index-v3/_search
{
"query": {
"match": {
"term": {
"query": "[remi",
"analyzer": "keyword",
"fuzziness": 0
}
}
}
}
As you can see I chose the keyword tokenizer for the autocomplete filter. I'm using ngram filter with min_gram: 1 and max_gram 15 which means our query will be separated into tokens like this:
input-query = i, in, inp, inpu, input .. and etc. Separates up to 15 tokens. This is wanted only at indexing time. Looking at the query we specify keyword analyzer as well - this analyzer is for the search time and it hard matches results. Here are some example searches and results:
GET testing-index-v3/_search
{
"query": {
"match": {
"term": {
"query": "[",
"analyzer": "keyword",
"fuzziness": 0
}
}
}
}
result:
"hits" : [
{
"_index" : "testing-index-v3",
"_type" : "_doc",
"_id" : "w5c_IHsBGGZ-oIJIi-6n",
"_score" : 0.7040055,
"_source" : {
"term" : "[remix]"
}
},
{
"_index" : "testing-index-v3",
"_type" : "_doc",
"_id" : "xJc_IHsBGGZ-oIJIju7m",
"_score" : 0.7040055,
"_source" : {
"term" : "[+18]"
}
}
]
GET testing-index-v3/_search
{
"query": {
"match": {
"term": {
"query": "[+",
"analyzer": "keyword",
"fuzziness": 0
}
}
}
}
result:
"hits" : [
{
"_index" : "testing-index-v3",
"_type" : "_doc",
"_id" : "xJc_IHsBGGZ-oIJIju7m",
"_score" : 0.7040055,
"_source" : {
"term" : "[+18]"
}
}
]
Hope this answer helps you. Good luck with your adventures with elasticsearch!

Related

Elasticsearch: order by length of array properties

im using Kibana and ES, i have an index with objects and an array called "reviews" that inside has properties called positive_comment, negative_comment, reviewer_name and more.
There are some reviews that have the field positive_comment empty (not null, just empty).
I need to order by the length of the field positive_comment first, so reviews with empty positive_comment comes last. The results are the same when ordering in SQL with LEN() property.
This is my query.
I also tried:
.value.size() in the script, or type "string" but no results.
{
"_source":[
"reviews.positive_comment"
],
"query":{
"bool":{
"filter":[
{
"term":{
"id":214
}
}
]
}
},
"sort":{
"_script":{
"script":"doc['reviews.positive_comment'].value.length()",
"type":"number",
"order":"asc"
}
}
}
This is my result:
{
"_source":{
"reviews":[
{
"positive_comment":"Great"
},
{
"positive_comment":"Really good product"
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":""
},
{
"positive_comment":"Awesome"
}
]
},
"sort":[
"0"
]
}
Elasticsearch doesn't support counting array elements, unless you use a script.
However, running a script for every matching document is going to degrade performance for every search query.
A better solution would be to count the values once, at index-time, and store the counts in dedicated fields (positive_comments_count, negative_comments_count, etc.) and use these fields for sorting.
Try following. It worked for me on ES 5.6.3. So, should work on higher versions too.
GET test/test/_search?filter_path=hits.hits
{
"query": {
"match_all": {}
},
"sort": {
"_script": {
"type": "number",
"script": {
"lang": "painless",
"source": "doc['reviews.positive_comment'].value.length()"
},
"order": "asc"
}
}
}
I have tested on ES 7.1 version too.
Mapping
PUT test
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 1
},
"mappings": {
"properties": {
"somefield": {
"type": "keyword"
},
"reviews": {
"properties": {
"positive_comment": {
"type": "keyword"
},
"item_id": {
"type": "double"
}
}
}
}
}
}
Query:
GET test/_search
{
"query": {
"match_all": {}
},
"_source": "reviews.positive_comment",
"sort": {
"_script": {
"type": "number",
"script": {
"lang": "painless",
"source": "doc['reviews.positive_comment'].value.length() % 100"
},
"order": "asc"
}
}
}
output
{
"hits" : {
"total" : {
"value" : 5,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "test",
"_type" : "_doc",
"_id" : "test5",
"_score" : null,
"_source" : {
"reviews" : [
{
"positive_comment" : ""
}
]
},
"sort" : [
0.0
]
},
{
"_index" : "test",
"_type" : "_doc",
"_id" : "test1",
"_score" : null,
"_source" : {
"reviews" : [
{
"positive_comment" : "Awesome"
}
]
},
"sort" : [
7.0
]
},
{
"_index" : "test",
"_type" : "_doc",
"_id" : "test3",
"_score" : null,
"_source" : {
"reviews" : [
{
"positive_comment" : "What a product"
}
]
},
"sort" : [
14.0
]
},
{
"_index" : "test",
"_type" : "_doc",
"_id" : "test2",
"_score" : null,
"_source" : {
"reviews" : [
{
"positive_comment" : "What a product.. amazing"
}
]
},
"sort" : [
24.0
]
},
{
"_index" : "test",
"_type" : "_doc",
"_id" : "test4",
"_score" : null,
"_source" : {
"reviews" : [
{
"positive_comment" : "Thats a great product.. "
}
]
},
"sort" : [
24.0
]
}
]
}
}

how to use terms query in elasticsearch 7.x in this case

elasticsearch version is 7.x
here has some nested data blow :
data1:
[{name:"tom"},{name:"jack"}]
data2:
[{name:"tom"},{name:"rose"}]
data3:
[{name:"tom"},{name:"rose3"}]
...
dataN:
[{name:"tom"},{name:"roseN"}]
when i use the terms query , I just want to search tom, jack, But don't want to include rose...roseN
query:{
terms:{["tom","jack"]}
}
this code is not effective
Adding a working example
Index Data:
PUT /_doc/1
{
"names": [
{
"name": "tom"
},
{
"name": "jack"
}
]
}
PUT /_doc/2
{
"names": [
{
"name": "tom"
},
{
"name": "rose"
}
]
}
Search Query:
{
"query": {
"bool": {
"must": {
"terms": {
"names.name": [
"tom",
"jack"
]
}
},
"must_not": {
"match": {
"names.name": "rose"
}
}
}
}
}
Search Result:
"hits": [
{
"_index": "65838516",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"names": [
{
"name": "tom"
},
{
"name": "jack"
}
]
}
}
]

Elastic Search error : Custom Analyzer [custom_analyzer] failed to find tokenizer under name [my_tokenizer]

Am trying for field mapping along with my custom_analyzer & tokenizer but am getting some error.
Please find the error below that am getting from kibana while mapping fields
Custom Analyzer [custom_analyzer] failed to find tokenizer under name [my_tokenizer]
Please find my mapping details.
PUT attach_local
{
"settings": {
"analysis": {
"analyzer": {
"custom_analyzer": {
"type": "custom",
"tokenizer": "my_tokenizer",
"char_filter": [
"html_strip"
],
"filter": [
"lowercase",
"asciifolding"
]
}
}
}
},
"tokenizer": {
"my_tokenizer": {
"type": "ngram",
"min_gram": 3,
"max_gram": 3,
"token_chars": [
"letter",
"digit"
]
}
},
"mappings" : {
"doc" : {
"properties" : {
"attachment" : {
"properties" : {
"content" : {
"type" : "text",
"analyzer": "custom_analyzer"
},
"content_length" : {
"type" : "long"
},
"content_type" : {
"type" : "text"
},
"language" : {
"type" : "text"
}
}
},
"resume" : {
"type" : "text"
}
}
}
}
}
It is very important to properly indent your JSON. You'd see that your tokenizer is not properly located inside the analysis section. Here is the right definition:
{
"settings": {
"analysis": {
"analyzer": {
"custom_analyzer": {
"type": "custom",
"tokenizer": "my_tokenizer",
"char_filter": [
"html_strip"
],
"filter": [
"lowercase",
"asciifolding"
]
}
},
"tokenizer": {
"my_tokenizer": {
"type": "ngram",
"min_gram": 3,
"max_gram": 3,
"token_chars": [
"letter",
"digit"
]
}
}
}
},
"mappings": {
"doc": {
"properties": {
"attachment": {
"properties": {
"content": {
"type": "text",
"analyzer": "custom_analyzer"
},
"content_length": {
"type": "long"
},
"content_type": {
"type": "text"
},
"language": {
"type": "text"
}
}
},
"resume": {
"type": "text"
}
}
}
}
}

"MapperParsingException[Analyzer [second] not found for field [Name]]"

I have created an index in Elasticsearch with the following settings:
{
"my_index" : {
"aliases" : { },
"mappings" : { },
"settings" : {
"index" : {
"creation_date" : "1461229073677",
"uuid" : "7-TECarfRs6XO8yZE7SeWA",
"number_of_replicas" : "1",
"number_of_shards" : "5",
"version" : {
"created" : "1040599"
},
"settings" : {
"analysis" : {
"analyzer" : {
"second" : {
"type" : "custom",
"filter" : [ "lowercase", "synonym" ],
"tokenizer" : "standard"
}
},
"filter" : {
"synonym" : {
"type" : "synonym",
"synonyms" : [ "i pad => ipad", "smart phone => smartphone" ]
}
}
}
}
}
},
"warmers" : { }
}
}
Now what I m trying to do is to set the mappings using the following code:
PutMapping putMapping = new PutMapping.Builder(
"my_index",
"my_index_type",
"{ \"properties\" : { \"Name\" : {\"type\" : \"string\", \"analyzer\" : \"second\"} } }"
).build();
JestResult result = client.execute(createIndex);
result = client.execute(putMapping);
EDIT
The code I m using to create the index is:
CreateIndex createIndex = new CreateIndex.Builder(indexName)
.settings(
ImmutableSettings.builder()
.loadFromClasspath(
"settings.json"
).build().getAsMap()
).build();
JestResult result = client.execute(createIndex);
and the settings.json looks like this:
{
"settings": {
"analysis": {
"analyzer": {
"second": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"synonym"
]
}
},
"filter": {
"synonym" : {
"type" : "synonym",
"synonyms" : [
"i pad => ipad",
"smart phone => smartphone",
"i phone => iphone"
]
}
}
}
}
}
However I keep getting the following error:
"MapperParsingException[Analyzer [second] not found for field [message]]"
I am able to set the mapping if I remove the "analyzer". So it seems that I have two times the "settings" section, but no matter how I structure the "settings.json" file I keep getting these two sections. I looked into the examples specified in JEST page but didnt help me. https://github.com/searchbox-io/Jest/blob/master/jest/README.md
Any ideas guys?
The settings you're using are not properly defined, i.e. you have two imbricated settings sections, the index settings should look like this instead:
{
"my_index": {
"aliases": {},
"mappings": {},
"settings": {
"index": {
"number_of_replicas": "1",
"number_of_shards": "5"
},
"analysis": {
"analyzer": {
"second": {
"type": "custom",
"filter": [
"lowercase",
"synonym"
],
"tokenizer": "standard"
}
},
"filter": {
"synonym": {
"type": "synonym",
"synonyms": [
"i pad => ipad",
"smart phone => smartphone"
]
}
}
}
},
"warmers": {}
}
}
UPDATE
Your settings.json file simply needs to contain the following:
{
"analysis": {
"analyzer": {
"second": {
"type": "custom",
"filter": [
"lowercase",
"synonym"
],
"tokenizer": "standard"
}
},
"filter": {
"synonym": {
"type": "synonym",
"synonyms": [
"i pad => ipad",
"smart phone => smartphone"
]
}
}
}
}

Elasticsearch and mongodb, partial search not working

This is my location Collection in mongodb:
{ "_id" : ObjectId("5270d36f28f31fd8fa016441"), "stateName" : "A5", "cityName" : "ABCNEW2" }
{ "_id" : ObjectId("5270d37328f31fd8fa016442"), "stateName" : "A5", "cityName" : "ABC" }
{ "_id" : ObjectId("5270d37b28f31fd8fa016443"), "stateName" : "65", "cityName" : "ABCRW" }
I created one index using elastic Search:
POST /bwitter
{"index":
{ "number_of_shards": 1,
"analysis": {
"filter": {
"mynGram" : {"type": "nGram", "min_gram": 2, "max_gram": 10}
},
"analyzer": { "a1" : {
"type":"custom",
"tokenizer": "standard",
"filter": ["lowercase", "mynGram"]
}
}
}
}
}
I created one mapping using elastic search:
PUT /bwitter/bweet/_mapping
{
"bweet" : {
"index_analyzer" : "a1",
"search_analyzer" : "standard",
"properties" : {
"stateName": {"type":"string", "analyzer":"standard"},
"cityName" : {"type" : "string" }
}
}
}
Created river as follows:
PUT /_river/mongodb/_meta
{
"type": "mongodb",
"mongodb": {
"db": "rakeshdb",
"collection": "locations"
},
"index": {
"name": "locations",
"type": "bweet"
}
}
If, I query GET /locations/_search?q=ABC, I get only one record (Full word search is happening, not happening for the partial word)
I almost spend a whole day on this but not able to solve this. Where am I going wrong?
I guess that it should be:
PUT /_river/mongodb/_meta
{
"type": "mongodb",
"mongodb": {
"db": "rakeshdb",
"collection": "locations"
},
"index": {
"name": "bwitter",
"type": "bweet"
}
}

Categories