summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorMartijn van Groningen <martijn.v.groningen@gmail.com>2017-06-09 12:48:41 +0200
committerMartijn van Groningen <martijn.v.groningen@gmail.com>2017-06-09 14:57:11 +0200
commitc7ae27d57f1ddea0841c5a484a34e9765ce9f014 (patch)
tree760c62e580f0a6601b692395cbeb36b3bad8da5e /core
parent87d19b21c7515352f7dce4097aca88033130cc79 (diff)
nested: In case of a single type the _id field should be added to the nested document instead of _uid field.
When `index.mapping.single_type` is `true` the `_uid` field is not used and instead `_id` field is used. Prior to this change nested documents would in this case still use the `_uid` field to mark to what root document they belong to. In case of deleting documents this could lead to only the root Lucene document to be deleted and not the nested Lucene documents. This broke the docid block ordering the block join relies on in order to work correctly and thus causing the `nested` query, `nested` aggregation, nested sorting and nested inner hits to either fail or yield incorrect results. This bug only manifests in 6.0.0-ALPHA2 release and snaphots (5.5.0-SNAPSHOT, 5.6.0-SNAPSHOT, 6.0.0-SNAPSHOT).
Diffstat (limited to 'core')
-rw-r--r--core/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java32
-rw-r--r--core/src/main/java/org/elasticsearch/index/mapper/IdFieldMapper.java5
-rw-r--r--core/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java103
-rw-r--r--core/src/test/java/org/elasticsearch/search/nested/SimpleNestedIT.java8
4 files changed, 126 insertions, 22 deletions
diff --git a/core/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/core/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java
index 489f4702bc..c2de26c96b 100644
--- a/core/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java
+++ b/core/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java
@@ -424,15 +424,33 @@ final class DocumentParser {
context = context.createNestedContext(mapper.fullPath());
ParseContext.Document nestedDoc = context.doc();
ParseContext.Document parentDoc = nestedDoc.getParent();
- // pre add the uid field if possible (id was already provided)
- IndexableField uidField = parentDoc.getField(UidFieldMapper.NAME);
- if (uidField != null) {
- // we don't need to add it as a full uid field in nested docs, since we don't need versioning
- // we also rely on this for UidField#loadVersion
- // this is a deeply nested field
- nestedDoc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
+ // We need to add the uid or id to this nested Lucene document too,
+ // If we do not do this then when a document gets deleted only the root Lucene document gets deleted and
+ // not the nested Lucene documents! Besides the fact that we would have zombie Lucene documents, the ordering of
+ // documents inside the Lucene index (document blocks) will be incorrect, as nested documents of different root
+ // documents are then aligned with other root documents. This will lead tothe nested query, sorting, aggregations
+ // and inner hits to fail or yield incorrect results.
+ if (context.mapperService().getIndexSettings().isSingleType()) {
+ IndexableField idField = parentDoc.getField(IdFieldMapper.NAME);
+ if (idField != null) {
+ // We just need to store the id as indexed field, so that IndexWriter#deleteDocuments(term) can then
+ // delete it when the root document is deleted too.
+ nestedDoc.add(new Field(IdFieldMapper.NAME, idField.stringValue(), IdFieldMapper.Defaults.NESTED_FIELD_TYPE));
+ } else {
+ throw new IllegalStateException("The root document of a nested document should have an id field");
+ }
+ } else {
+ IndexableField uidField = parentDoc.getField(UidFieldMapper.NAME);
+ if (uidField != null) {
+ /// We just need to store the uid as indexed field, so that IndexWriter#deleteDocuments(term) can then
+ // delete it when the root document is deleted too.
+ nestedDoc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
+ } else {
+ throw new IllegalStateException("The root document of a nested document should have an uid field");
+ }
}
+
// the type of the nested doc starts with __, so we can identify that its a nested one in filters
// note, we don't prefix it with the type of the doc since it allows us to execute a nested query
// across types (for example, with similar nested objects)
diff --git a/core/src/main/java/org/elasticsearch/index/mapper/IdFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/IdFieldMapper.java
index a9a765f1c3..813a546aae 100644
--- a/core/src/main/java/org/elasticsearch/index/mapper/IdFieldMapper.java
+++ b/core/src/main/java/org/elasticsearch/index/mapper/IdFieldMapper.java
@@ -52,6 +52,7 @@ public class IdFieldMapper extends MetadataFieldMapper {
public static final String NAME = IdFieldMapper.NAME;
public static final MappedFieldType FIELD_TYPE = new IdFieldType();
+ public static final MappedFieldType NESTED_FIELD_TYPE;
static {
FIELD_TYPE.setTokenized(false);
@@ -62,6 +63,10 @@ public class IdFieldMapper extends MetadataFieldMapper {
FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);
FIELD_TYPE.setName(NAME);
FIELD_TYPE.freeze();
+
+ NESTED_FIELD_TYPE = FIELD_TYPE.clone();
+ NESTED_FIELD_TYPE.setStored(false);
+ NESTED_FIELD_TYPE.freeze();
}
}
diff --git a/core/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java b/core/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java
index 49864768ed..d3d099672b 100644
--- a/core/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java
+++ b/core/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java
@@ -25,6 +25,7 @@ import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexService;
@@ -148,6 +149,92 @@ public class DocumentParserTests extends ESSingleNodeTestCase {
e.getMessage());
}
+ public void testNestedHaveIdAndTypeFields() throws Exception {
+ DocumentMapperParser mapperParser1 = createIndex("index1", Settings.builder()
+ .put("index.mapping.single_type", false).build()
+ ).mapperService().documentMapperParser();
+ DocumentMapperParser mapperParser2 = createIndex("index2", Settings.builder()
+ .put("index.mapping.single_type", true).build()
+ ).mapperService().documentMapperParser();
+
+ XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties");
+ {
+ mapping.startObject("foo");
+ mapping.field("type", "nested");
+ {
+ mapping.startObject("properties");
+ {
+
+ mapping.startObject("bar");
+ mapping.field("type", "keyword");
+ mapping.endObject();
+ }
+ mapping.endObject();
+ }
+ mapping.endObject();
+ }
+ {
+ mapping.startObject("baz");
+ mapping.field("type", "keyword");
+ mapping.endObject();
+ }
+ mapping.endObject().endObject().endObject();
+ DocumentMapper mapper1 = mapperParser1.parse("type", new CompressedXContent(mapping.string()));
+ DocumentMapper mapper2 = mapperParser2.parse("type", new CompressedXContent(mapping.string()));
+
+ XContentBuilder doc = XContentFactory.jsonBuilder().startObject();
+ {
+ doc.startArray("foo");
+ {
+ doc.startObject();
+ doc.field("bar", "value1");
+ doc.endObject();
+ }
+ doc.endArray();
+ doc.field("baz", "value2");
+ }
+ doc.endObject();
+
+ // Verify in the case where multiple types are allowed that the _uid field is added to nested documents:
+ ParsedDocument result = mapper1.parse(SourceToParse.source("index1", "type", "1", doc.bytes(), XContentType.JSON));
+ assertEquals(2, result.docs().size());
+ // Nested document:
+ assertNull(result.docs().get(0).getField(IdFieldMapper.NAME));
+ assertNotNull(result.docs().get(0).getField(UidFieldMapper.NAME));
+ assertEquals("type#1", result.docs().get(0).getField(UidFieldMapper.NAME).stringValue());
+ assertEquals(UidFieldMapper.Defaults.NESTED_FIELD_TYPE, result.docs().get(0).getField(UidFieldMapper.NAME).fieldType());
+ assertNotNull(result.docs().get(0).getField(TypeFieldMapper.NAME));
+ assertEquals("__foo", result.docs().get(0).getField(TypeFieldMapper.NAME).stringValue());
+ assertEquals("value1", result.docs().get(0).getField("foo.bar").binaryValue().utf8ToString());
+ // Root document:
+ assertNull(result.docs().get(1).getField(IdFieldMapper.NAME));
+ assertNotNull(result.docs().get(1).getField(UidFieldMapper.NAME));
+ assertEquals("type#1", result.docs().get(1).getField(UidFieldMapper.NAME).stringValue());
+ assertEquals(UidFieldMapper.Defaults.FIELD_TYPE, result.docs().get(1).getField(UidFieldMapper.NAME).fieldType());
+ assertNotNull(result.docs().get(1).getField(TypeFieldMapper.NAME));
+ assertEquals("type", result.docs().get(1).getField(TypeFieldMapper.NAME).stringValue());
+ assertEquals("value2", result.docs().get(1).getField("baz").binaryValue().utf8ToString());
+
+ // Verify in the case where only a single type is allowed that the _id field is added to nested documents:
+ result = mapper2.parse(SourceToParse.source("index2", "type", "1", doc.bytes(), XContentType.JSON));
+ assertEquals(2, result.docs().size());
+ // Nested document:
+ assertNull(result.docs().get(0).getField(UidFieldMapper.NAME));
+ assertNotNull(result.docs().get(0).getField(IdFieldMapper.NAME));
+ assertEquals("1", result.docs().get(0).getField(IdFieldMapper.NAME).stringValue());
+ assertEquals(IdFieldMapper.Defaults.NESTED_FIELD_TYPE, result.docs().get(0).getField(IdFieldMapper.NAME).fieldType());
+ assertNotNull(result.docs().get(0).getField(TypeFieldMapper.NAME));
+ assertEquals("__foo", result.docs().get(0).getField(TypeFieldMapper.NAME).stringValue());
+ assertEquals("value1", result.docs().get(0).getField("foo.bar").binaryValue().utf8ToString());
+ // Root document:
+ assertNull(result.docs().get(1).getField(UidFieldMapper.NAME));
+ assertNotNull(result.docs().get(1).getField(IdFieldMapper.NAME));
+ assertEquals("1", result.docs().get(1).getField(IdFieldMapper.NAME).stringValue());
+ assertEquals(IdFieldMapper.Defaults.FIELD_TYPE, result.docs().get(1).getField(IdFieldMapper.NAME).fieldType());
+ assertNull(result.docs().get(1).getField(TypeFieldMapper.NAME));
+ assertEquals("value2", result.docs().get(1).getField("baz").binaryValue().utf8ToString());
+ }
+
public void testPropagateDynamicWithExistingMapper() throws Exception {
DocumentMapperParser mapperParser = createIndex("test").mapperService().documentMapperParser();
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
@@ -639,7 +726,7 @@ public class DocumentParserTests extends ESSingleNodeTestCase {
.value(0)
.value(1)
.endArray().endObject().bytes();
- MapperParsingException exception = expectThrows(MapperParsingException.class,
+ MapperParsingException exception = expectThrows(MapperParsingException.class,
() -> mapper.parse(SourceToParse.source("test", "type", "1", bytes, XContentType.JSON)));
assertEquals("Could not dynamically add mapping for field [foo.bar.baz]. "
+ "Existing mapping for [foo] must be of type object but found [long].", exception.getMessage());
@@ -758,7 +845,7 @@ public class DocumentParserTests extends ESSingleNodeTestCase {
BytesReference bytes = XContentFactory.jsonBuilder()
.startObject().field("foo.bar.baz", 0)
.endObject().bytes();
- MapperParsingException exception = expectThrows(MapperParsingException.class,
+ MapperParsingException exception = expectThrows(MapperParsingException.class,
() -> mapper.parse(SourceToParse.source("test", "type", "1", bytes, XContentType.JSON)));
assertEquals("Could not dynamically add mapping for field [foo.bar.baz]. "
+ "Existing mapping for [foo] must be of type object but found [long].", exception.getMessage());
@@ -880,7 +967,7 @@ public class DocumentParserTests extends ESSingleNodeTestCase {
BytesReference bytes = XContentFactory.jsonBuilder().startObject().startObject("foo.bar.baz").field("a", 0).endObject().endObject()
.bytes();
- MapperParsingException exception = expectThrows(MapperParsingException.class,
+ MapperParsingException exception = expectThrows(MapperParsingException.class,
() -> mapper.parse(SourceToParse.source("test", "type", "1", bytes, XContentType.JSON)));
assertEquals("Could not dynamically add mapping for field [foo.bar.baz]. "
@@ -1017,7 +1104,7 @@ public class DocumentParserTests extends ESSingleNodeTestCase {
.field("test2", "value2")
.startObject("inner").field("inner_field", "inner_value").endObject()
.endObject()
- .bytes(),
+ .bytes(),
XContentType.JSON));
assertThat(doc.rootDoc().get("test1"), equalTo("value1"));
@@ -1036,7 +1123,7 @@ public class DocumentParserTests extends ESSingleNodeTestCase {
.field("test2", "value2")
.startObject("inner").field("inner_field", "inner_value").endObject()
.endObject().endObject()
- .bytes(),
+ .bytes(),
XContentType.JSON));
assertThat(doc.rootDoc().get("type.test1"), equalTo("value1"));
@@ -1056,7 +1143,7 @@ public class DocumentParserTests extends ESSingleNodeTestCase {
.field("test2", "value2")
.startObject("inner").field("inner_field", "inner_value").endObject()
.endObject()
- .bytes(),
+ .bytes(),
XContentType.JSON));
assertThat(doc.rootDoc().get("type"), equalTo("value_type"));
@@ -1077,7 +1164,7 @@ public class DocumentParserTests extends ESSingleNodeTestCase {
.field("test2", "value2")
.startObject("inner").field("inner_field", "inner_value").endObject()
.endObject().endObject()
- .bytes(),
+ .bytes(),
XContentType.JSON));
assertThat(doc.rootDoc().get("type.type"), equalTo("value_type"));
@@ -1098,7 +1185,7 @@ public class DocumentParserTests extends ESSingleNodeTestCase {
.field("test2", "value2")
.startObject("inner").field("inner_field", "inner_value").endObject()
.endObject()
- .bytes(),
+ .bytes(),
XContentType.JSON));
// in this case, we analyze the type object as the actual document, and ignore the other same level fields
diff --git a/core/src/test/java/org/elasticsearch/search/nested/SimpleNestedIT.java b/core/src/test/java/org/elasticsearch/search/nested/SimpleNestedIT.java
index 316e83ad1b..3e4792690a 100644
--- a/core/src/test/java/org/elasticsearch/search/nested/SimpleNestedIT.java
+++ b/core/src/test/java/org/elasticsearch/search/nested/SimpleNestedIT.java
@@ -57,9 +57,7 @@ import static org.hamcrest.Matchers.startsWith;
public class SimpleNestedIT extends ESIntegTestCase {
public void testSimpleNested() throws Exception {
assertAcked(prepareCreate("test")
- .setSettings("index.mapping.single_type", false)
- .addMapping("type1", "nested1", "type=nested")
- .addMapping("type2", "nested1", "type=nested"));
+ .addMapping("type1", "nested1", "type=nested"));
ensureGreen();
// check on no data, see it works
@@ -158,10 +156,6 @@ public class SimpleNestedIT extends ESIntegTestCase {
searchResponse = client().prepareSearch("test").setQuery(nestedQuery("nested1", termQuery("nested1.n_field1", "n_value1_1"), ScoreMode.Avg)).execute().actionGet();
assertNoFailures(searchResponse);
assertThat(searchResponse.getHits().getTotalHits(), equalTo(1L));
-
- searchResponse = client().prepareSearch("test").setTypes("type1", "type2").setQuery(nestedQuery("nested1", termQuery("nested1.n_field1", "n_value1_1"), ScoreMode.Avg)).execute().actionGet();
- assertNoFailures(searchResponse);
- assertThat(searchResponse.getHits().getTotalHits(), equalTo(1L));
}
public void testMultiNested() throws Exception {