elasticsearch version is 7.x
here has some nested data blow :
data1:
[{name:"tom"},{name:"jack"}]
data2:
[{name:"tom"},{name:"rose"}]
data3:
[{name:"tom"},{name:"rose3"}]
...
dataN:
[{name:"tom"},{name:"roseN"}]
when i use the terms query , I just want to search tom, jack, But don't want to include rose...roseN
query:{
terms:{["tom","jack"]}
}
this code is not effective
Adding a working example
Index Data:
PUT /_doc/1
{
"names": [
{
"name": "tom"
},
{
"name": "jack"
}
]
}
PUT /_doc/2
{
"names": [
{
"name": "tom"
},
{
"name": "rose"
}
]
}
Search Query:
{
"query": {
"bool": {
"must": {
"terms": {
"names.name": [
"tom",
"jack"
]
}
},
"must_not": {
"match": {
"names.name": "rose"
}
}
}
}
}
Search Result:
"hits": [
{
"_index": "65838516",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"names": [
{
"name": "tom"
},
{
"name": "jack"
}
]
}
}
]
Related
I am using Java to perform queries on Elasticsearch, via the ElasticSearchClient. As there are big variables returned, I would like to only retrieve the ones that are relevant but the variables in _source are nested.
Below is a sample index response (multiple indexes can be returned with same _source structure)
[
{
"_index": "kn-tas-20200630",
"_type": "_doc",
"_id": "1122334455",
"_score": null,
"_source": {
"variables": [
{
"rawValue": "DEFH",
"name": "MANAGER"
},
{
"rawValue": "ABCD",
"name": "EMPLOYEE"
},
{
"rawValue": "[{\"rowId\":102030,\"rowType\":\"SIM\"}]",
"name": "extData"
}
]
},
"sort": [
1665735632119
]
}
]
I would like to create a query using SearchSourceBuilder to query ES and only retrieve the following:
Get the rawValue by name (I provide Manager, I get "DFEH")
Get the rowType value (I provide extData + row Type, I get "SIM")
Below is my query:
{
"from": 0,
"size": 100,
"query": {
"bool": {
"must": [
{
"terms": {
"prcKey": [
"K-112"
],
"boost": 1.0
}
}
],
"must_not": [
{
"exists": {
"field": "endDate",
"boost": 1.0
}
},
{
"term": {
"personInCharge": {
"value": "ABC",
"boost": 1.0
}
}
}
],
"adjust_pure_negative": true,
"boost": 1.0
}
},
"_source": {
"includes": [
"variables.name",
"variables.rawValue"
],
"excludes": []
},
"sort": [
{
"createTime": {
"order": "desc"
}
}
]
}
How can I fix my query? I tried using nested queries but without any luck.
db={
"dashboard": [
{
"_id": "dashboard1",
"name": "test",
"user": 1
}
],
"templatefolders": [
{
"dashboardId": "dashboard1",
"folderId": "folder123",
"name": "folder",
"region": "XXX"
}
],
"folders": [
{
"_id": "folder123"
}
],
"user": [
{
"_id": 1,
"name": "alaa"
}
],
}
this is my function:
db.dashboard.aggregate([
{
"$lookup": {
"from": "templatefolders",
"localField": "_id",
"foreignField": "dashboardId",
"as": "joinDashboard"
}
},
{
"$lookup": {
"from": "folders",
"localField": "joinDashboard.folderId",
"foreignField": "_id",
"as": "joinDashboard.joinFolder"
}
},
])
Result :
[
{
"_id": "dashboard1",
"joinDashboard": {
"joinFolder": [
{
"_id": "folder123"
}
]
},
"name": "test",
"user": 1
}
]
[![enter image description here][1]][1]
Why the fields name and region in collection templatefolders are excluded ?
I want to know why this behavior ? I don't like to use unwind because i have multiple collections with multiple refrence relation.
Your second $lookup, is overriding the joinDashboard key completely. Since you want joinFolder to be within joinDashboard, you can try nested lookups like this:
db.dashboard.aggregate([
{
$lookup: {
from: "templatefolders",
let: {
"boardId": "$_id"
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$dashboardId",
"$$boardId"
]
}
}
},
{
$lookup: {
from: "folders",
let: {
"folderId": "$folderId"
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$_id",
"$$folderId"
]
}
}
},
],
as: "joinFolder"
},
},
],
as: "joinDashboard"
}
}
])
MongoPlayground link.
I am trying to filter the records based on nested field and want only the matching object in that array to be shown as part of the record.
Below is the detailed explanation of my requirement.
So, I have Elasticsearch data like this:
[{
"basicInfo": {
"requestId": 123,
},
"managerInfo": {
"manager": "John",
},
"groupInfo": [
{
"id": "id1",
"name": "abc",
"status": "Approved"
},
{
"id": "id2",
"name": "abc",
"status": "Pending"
}
]
},
{
"basicInfo": {
"requestId": 233,
},
"managerInfo": {
"manager": "John Sr",
},
"groupInfo": [
{
"id": "id3",
"name": "abc",
"status": "Pending"
}
]
}
]
I want to filter the records only with groupInfo.status as Approved and basicInfo.requestId as 123, but my condition is I should only get the Approved record in the groupInfo and not the pending ones. So, the output I am expecting is:
{
"took": 23,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 3.0602708,
"hits": [
{
"_index": "my_index",
"_type": "request",
"_id": "123",
"_score": 3.0602708,
"_source": {
"basicInfo": {
"requestId": 123
},
"managerInfo": {
"manager": "John"
},
"groupInfo": [
{
"id": "id1",
"name": "abc",
"status": "Approved"
}
// No id2 here as it is in pending state
]
}
}
]
}
}
But instead I am able to achieve:
{
"took": 23,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 3.0602708,
"hits": [
{
"_index": "my_index",
"_type": "request",
"_id": "123",
"_score": 3.0602708,
"_source": {
"basicInfo": {
"requestId": 123
},
"managerInfo": {
"manager": "John"
},
"groupInfo": [
{
"id": "id1",
"name": "abc",
"status": "Approved"
},
{
"id": "id2",
"name": "abc",
"status": "Pending"
}
]
}
}
]
}
}
This is the query I am using:
{
"query": {
"bool": {
"must": [
{
"match": {
"basicInfo.requestId": "123"
}
},
{
"nested": {
"path": "groupInfo",
"query": {
"bool": {
"must": [
{
"term": {
"groupInfo.status": "Approved"
}
}
]
}
}
}
}
]
}
}
}
So, my question is first what I am expecting, is that even possible? Can we filter the result and make sure that we get only the matched array from that result?
If yes, how can we do it?
Thanks in advance.
Maybe you are looking for Inner Hits.
In many cases, it’s very useful to know which inner nested objects (in
the case of nested) or children/parent documents (in the case of
parent/child) caused certain information to be returned. The inner
hits feature can be used for this. This feature returns per search hit
in the search response additional nested hits that caused a search hit
to match in a different scope.
{
"query": {
"bool": {
"must": [
{
"match": {
"basicInfo.requestId": "123"
}
},
{
"nested": {
"path": "groupInfo",
"query": {
"bool": {
"must": [
{
"term": {
"groupInfo.status": "Approved"
}
}
]
}
},
"inner_hits":{}
}
}
]
}
}
}
I have a problem. I have a document:
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.10536051,
"hits": [
{
"_index": ...,
"_type": "_doc",
"_id": ...,
"_score": 0.10536051,
"_source": {
...
"testProperty": ["asd-asd", "sdf-sdf"]
}
}
]
}
}
I need to build a query to find documents where testProperty doesn't contain any element from the array I give.
I tried something like
{
"query":{
"bool":{
"must": {
...
},
"must_not":[
...
{
"terms": {
"testProperty": [
"qwe-qwe",
"asd-asd"
]
}
}
]
}
}
}
and it doesn't work. Do you have any idea how to do this?
Adding a working example
Index Mapping:
{
"mappings": {
"properties": {
"testProperty": {
"type": "keyword"
}
}
}
}
Index Data:
{
"testProperty": "sdf-sdf"
}
{
"testProperty": "asd-asd"
}
Search Query:
{
"query": {
"bool": {
"must_not": {
"terms": {
"testProperty": [
"qwe-qwe",
"asd-asd"
]
}
}
}
}
}
Search Result:
"hits": [
{
"_index": "66195355",
"_type": "_doc",
"_id": "2",
"_score": 0.0,
"_source": {
"testProperty": "sdf-sdf"
}
}
]
I only want to highlight the words I search in the query, not including the synonym, but I also hope es can return the search result can contain the synonym search result, here is an example.
PUT /my_test_index/
{
"settings": {
"analysis": {
"filter": {
"native_synonym": {
"type": "synonym",
"ignore_case": true,
"expand": true,
"synonyms": [
"apple,fruit"
]
}
},
"analyzer": {
"test_analyzer": {
"tokenizer": "whitespace",
"filter": [
"native_synonym"
]
}
}
}
},
"mappings": {
"properties": {
"desc": {
"type": "text",
"analyzer": "test_analyzer"
}
}
}
}
POST /my_test_index/_doc
{
"desc": "apple"
}
POST /my_test_index/_doc
{
"desc": "fruit"
}
GET /my_test_index/_search
{
"query": {
"match": {
"desc": "apple"
}
},
"highlight": {
"fields": {
"desc": {}
}
}
}
However, es highlight both fruit and apple while I only want the apple get highlight.
Anyone knows how to solve this? Thanks in advance :)
"hits": [
{
"_index": "my_test_index",
"_type": "_doc",
"_id": "RMyZrXAB7JsJEwsbVF33",
"_score": 0.29171452,
"_source": {
"desc": "apple"
},
"highlight": {
"desc": [
"<em>apple</em>"
]
}
},
{
"_index": "my_test_index",
"_type": "_doc",
"_id": "RcyarXAB7JsJEwsboF2V",
"_score": 0.29171452,
"_source": {
"desc": "fruit"
},
"highlight": {
"desc": [
"<em>fruit</em>"
]
}
}
]
You can add a highlight query that behaves different to your actual search query. All you need then is a field indexed without the synonyms, and you should be able to get what you want:
PUT /my_test_index/
{
"settings": {
"analysis": {
"filter": {
"native_synonym": {
"type": "synonym",
"ignore_case": true,
"expand": true,
"synonyms": [
"apple,fruit"
]
}
},
"analyzer": {
"test_analyzer": {
"tokenizer": "whitespace",
"filter": [
"native_synonym"
]
}
}
}
},
"mappings": {
"properties": {
"desc": {
"type": "text",
"analyzer": "test_analyzer",
"fields": {
"raw": {
"type": "text",
"analyzer": "whitespace"
}
}
}
}
}
}
GET /my_test_index/_search
{
"query": {
"match": {
"desc": "apple"
}
},
"highlight": {
"fields": {
"desc.raw": {
"highlight_query": {
"match": {
"desc.raw": "apple"
}
}
}
}
}
}