how to use terms query in elasticsearch 7.x in this case - java

elasticsearch version is 7.x
here has some nested data blow :
data1:
[{name:"tom"},{name:"jack"}]
data2:
[{name:"tom"},{name:"rose"}]
data3:
[{name:"tom"},{name:"rose3"}]
...
dataN:
[{name:"tom"},{name:"roseN"}]
when i use the terms query , I just want to search tom, jack, But don't want to include rose...roseN
query:{
terms:{["tom","jack"]}
}
this code is not effective

Adding a working example
Index Data:
PUT /_doc/1
{
"names": [
{
"name": "tom"
},
{
"name": "jack"
}
]
}
PUT /_doc/2
{
"names": [
{
"name": "tom"
},
{
"name": "rose"
}
]
}
Search Query:
{
"query": {
"bool": {
"must": {
"terms": {
"names.name": [
"tom",
"jack"
]
}
},
"must_not": {
"match": {
"names.name": "rose"
}
}
}
}
}
Search Result:
"hits": [
{
"_index": "65838516",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"names": [
{
"name": "tom"
},
{
"name": "jack"
}
]
}
}
]

Related

Searching _source and returning only needed fields using Java

I am using Java to perform queries on Elasticsearch, via the ElasticSearchClient. As there are big variables returned, I would like to only retrieve the ones that are relevant but the variables in _source are nested.
Below is a sample index response (multiple indexes can be returned with same _source structure)
[
{
"_index": "kn-tas-20200630",
"_type": "_doc",
"_id": "1122334455",
"_score": null,
"_source": {
"variables": [
{
"rawValue": "DEFH",
"name": "MANAGER"
},
{
"rawValue": "ABCD",
"name": "EMPLOYEE"
},
{
"rawValue": "[{\"rowId\":102030,\"rowType\":\"SIM\"}]",
"name": "extData"
}
]
},
"sort": [
1665735632119
]
}
]
I would like to create a query using SearchSourceBuilder to query ES and only retrieve the following:
Get the rawValue by name (I provide Manager, I get "DFEH")
Get the rowType value (I provide extData + row Type, I get "SIM")
Below is my query:
{
"from": 0,
"size": 100,
"query": {
"bool": {
"must": [
{
"terms": {
"prcKey": [
"K-112"
],
"boost": 1.0
}
}
],
"must_not": [
{
"exists": {
"field": "endDate",
"boost": 1.0
}
},
{
"term": {
"personInCharge": {
"value": "ABC",
"boost": 1.0
}
}
}
],
"adjust_pure_negative": true,
"boost": 1.0
}
},
"_source": {
"includes": [
"variables.name",
"variables.rawValue"
],
"excludes": []
},
"sort": [
{
"createTime": {
"order": "desc"
}
}
]
}
How can I fix my query? I tried using nested queries but without any luck.

MongoDB : fields are excluded when invoke multiple lookup stages

db={
"dashboard": [
{
"_id": "dashboard1",
"name": "test",
"user": 1
}
],
"templatefolders": [
{
"dashboardId": "dashboard1",
"folderId": "folder123",
"name": "folder",
"region": "XXX"
}
],
"folders": [
{
"_id": "folder123"
}
],
"user": [
{
"_id": 1,
"name": "alaa"
}
],
}
this is my function:
db.dashboard.aggregate([
{
"$lookup": {
"from": "templatefolders",
"localField": "_id",
"foreignField": "dashboardId",
"as": "joinDashboard"
}
},
{
"$lookup": {
"from": "folders",
"localField": "joinDashboard.folderId",
"foreignField": "_id",
"as": "joinDashboard.joinFolder"
}
},
])
Result :
[
{
"_id": "dashboard1",
"joinDashboard": {
"joinFolder": [
{
"_id": "folder123"
}
]
},
"name": "test",
"user": 1
}
]
[![enter image description here][1]][1]
Why the fields name and region in collection templatefolders are excluded ?
I want to know why this behavior ? I don't like to use unwind because i have multiple collections with multiple refrence relation.
Your second $lookup, is overriding the joinDashboard key completely. Since you want joinFolder to be within joinDashboard, you can try nested lookups like this:
db.dashboard.aggregate([
{
$lookup: {
from: "templatefolders",
let: {
"boardId": "$_id"
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$dashboardId",
"$$boardId"
]
}
}
},
{
$lookup: {
from: "folders",
let: {
"folderId": "$folderId"
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$_id",
"$$folderId"
]
}
}
},
],
as: "joinFolder"
},
},
],
as: "joinDashboard"
}
}
])
MongoPlayground link.

Elasticsearch: Filter the records based on nested field with nested field containing only the filtered object

I am trying to filter the records based on nested field and want only the matching object in that array to be shown as part of the record.
Below is the detailed explanation of my requirement.
So, I have Elasticsearch data like this:
[{
"basicInfo": {
"requestId": 123,
},
"managerInfo": {
"manager": "John",
},
"groupInfo": [
{
"id": "id1",
"name": "abc",
"status": "Approved"
},
{
"id": "id2",
"name": "abc",
"status": "Pending"
}
]
},
{
"basicInfo": {
"requestId": 233,
},
"managerInfo": {
"manager": "John Sr",
},
"groupInfo": [
{
"id": "id3",
"name": "abc",
"status": "Pending"
}
]
}
]
I want to filter the records only with groupInfo.status as Approved and basicInfo.requestId as 123, but my condition is I should only get the Approved record in the groupInfo and not the pending ones. So, the output I am expecting is:
{
"took": 23,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 3.0602708,
"hits": [
{
"_index": "my_index",
"_type": "request",
"_id": "123",
"_score": 3.0602708,
"_source": {
"basicInfo": {
"requestId": 123
},
"managerInfo": {
"manager": "John"
},
"groupInfo": [
{
"id": "id1",
"name": "abc",
"status": "Approved"
}
// No id2 here as it is in pending state
]
}
}
]
}
}
But instead I am able to achieve:
{
"took": 23,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 3.0602708,
"hits": [
{
"_index": "my_index",
"_type": "request",
"_id": "123",
"_score": 3.0602708,
"_source": {
"basicInfo": {
"requestId": 123
},
"managerInfo": {
"manager": "John"
},
"groupInfo": [
{
"id": "id1",
"name": "abc",
"status": "Approved"
},
{
"id": "id2",
"name": "abc",
"status": "Pending"
}
]
}
}
]
}
}
This is the query I am using:
{
"query": {
"bool": {
"must": [
{
"match": {
"basicInfo.requestId": "123"
}
},
{
"nested": {
"path": "groupInfo",
"query": {
"bool": {
"must": [
{
"term": {
"groupInfo.status": "Approved"
}
}
]
}
}
}
}
]
}
}
}
So, my question is first what I am expecting, is that even possible? Can we filter the result and make sure that we get only the matched array from that result?
If yes, how can we do it?
Thanks in advance.
Maybe you are looking for Inner Hits.
In many cases, it’s very useful to know which inner nested objects (in
the case of nested) or children/parent documents (in the case of
parent/child) caused certain information to be returned. The inner
hits feature can be used for this. This feature returns per search hit
in the search response additional nested hits that caused a search hit
to match in a different scope.
{
"query": {
"bool": {
"must": [
{
"match": {
"basicInfo.requestId": "123"
}
},
{
"nested": {
"path": "groupInfo",
"query": {
"bool": {
"must": [
{
"term": {
"groupInfo.status": "Approved"
}
}
]
}
},
"inner_hits":{}
}
}
]
}
}
}

Search documents with property (array) does not contain any elements from other array

I have a problem. I have a document:
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.10536051,
"hits": [
{
"_index": ...,
"_type": "_doc",
"_id": ...,
"_score": 0.10536051,
"_source": {
...
"testProperty": ["asd-asd", "sdf-sdf"]
}
}
]
}
}
I need to build a query to find documents where testProperty doesn't contain any element from the array I give.
I tried something like
{
"query":{
"bool":{
"must": {
...
},
"must_not":[
...
{
"terms": {
"testProperty": [
"qwe-qwe",
"asd-asd"
]
}
}
]
}
}
}
and it doesn't work. Do you have any idea how to do this?
Adding a working example
Index Mapping:
{
"mappings": {
"properties": {
"testProperty": {
"type": "keyword"
}
}
}
}
Index Data:
{
"testProperty": "sdf-sdf"
}
{
"testProperty": "asd-asd"
}
Search Query:
{
"query": {
"bool": {
"must_not": {
"terms": {
"testProperty": [
"qwe-qwe",
"asd-asd"
]
}
}
}
}
}
Search Result:
"hits": [
{
"_index": "66195355",
"_type": "_doc",
"_id": "2",
"_score": 0.0,
"_source": {
"testProperty": "sdf-sdf"
}
}
]

how to disable es highlight the synonym?

I only want to highlight the words I search in the query, not including the synonym, but I also hope es can return the search result can contain the synonym search result, here is an example.
PUT /my_test_index/
{
"settings": {
"analysis": {
"filter": {
"native_synonym": {
"type": "synonym",
"ignore_case": true,
"expand": true,
"synonyms": [
"apple,fruit"
]
}
},
"analyzer": {
"test_analyzer": {
"tokenizer": "whitespace",
"filter": [
"native_synonym"
]
}
}
}
},
"mappings": {
"properties": {
"desc": {
"type": "text",
"analyzer": "test_analyzer"
}
}
}
}
POST /my_test_index/_doc
{
"desc": "apple"
}
POST /my_test_index/_doc
{
"desc": "fruit"
}
GET /my_test_index/_search
{
"query": {
"match": {
"desc": "apple"
}
},
"highlight": {
"fields": {
"desc": {}
}
}
}
However, es highlight both fruit and apple while I only want the apple get highlight.
Anyone knows how to solve this? Thanks in advance :)
"hits": [
{
"_index": "my_test_index",
"_type": "_doc",
"_id": "RMyZrXAB7JsJEwsbVF33",
"_score": 0.29171452,
"_source": {
"desc": "apple"
},
"highlight": {
"desc": [
"<em>apple</em>"
]
}
},
{
"_index": "my_test_index",
"_type": "_doc",
"_id": "RcyarXAB7JsJEwsboF2V",
"_score": 0.29171452,
"_source": {
"desc": "fruit"
},
"highlight": {
"desc": [
"<em>fruit</em>"
]
}
}
]
You can add a highlight query that behaves different to your actual search query. All you need then is a field indexed without the synonyms, and you should be able to get what you want:
PUT /my_test_index/
{
"settings": {
"analysis": {
"filter": {
"native_synonym": {
"type": "synonym",
"ignore_case": true,
"expand": true,
"synonyms": [
"apple,fruit"
]
}
},
"analyzer": {
"test_analyzer": {
"tokenizer": "whitespace",
"filter": [
"native_synonym"
]
}
}
}
},
"mappings": {
"properties": {
"desc": {
"type": "text",
"analyzer": "test_analyzer",
"fields": {
"raw": {
"type": "text",
"analyzer": "whitespace"
}
}
}
}
}
}
GET /my_test_index/_search
{
"query": {
"match": {
"desc": "apple"
}
},
"highlight": {
"fields": {
"desc.raw": {
"highlight_query": {
"match": {
"desc.raw": "apple"
}
}
}
}
}
}

Categories