diff options
author | Tal Levy <JubBoy333@gmail.com> | 2016-12-20 10:53:28 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-12-20 10:53:28 -0800 |
commit | 5a90d9d7e6171d34b93ffe284a1512c9047d4413 (patch) | |
tree | 03ff169df948a031cc670a30c92e08b355ac4680 /plugins/ingest-attachment | |
parent | ad4b1ecdebf7abc19b1faa6577844425088a5719 (diff) |
add `ignore_missing` flag to ingest plugins (#22273)
added `ignore_missing` flag to:
- Attachment Processor
- GeoIP Processor
- User-Agent Processor
Diffstat (limited to 'plugins/ingest-attachment')
3 files changed, 77 insertions, 5 deletions
diff --git a/plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/AttachmentProcessor.java b/plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/AttachmentProcessor.java index 0ba79ecc9f..f7f474711b 100644 --- a/plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/AttachmentProcessor.java +++ b/plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/AttachmentProcessor.java @@ -38,6 +38,7 @@ import java.util.Map; import java.util.Set; import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException; +import static org.elasticsearch.ingest.ConfigurationUtils.readBooleanProperty; import static org.elasticsearch.ingest.ConfigurationUtils.readIntProperty; import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalList; import static org.elasticsearch.ingest.ConfigurationUtils.readStringProperty; @@ -52,23 +53,36 @@ public final class AttachmentProcessor extends AbstractProcessor { private final String targetField; private final Set<Property> properties; private final int indexedChars; + private final boolean ignoreMissing; AttachmentProcessor(String tag, String field, String targetField, Set<Property> properties, - int indexedChars) throws IOException { + int indexedChars, boolean ignoreMissing) throws IOException { super(tag); this.field = field; this.targetField = targetField; this.properties = properties; this.indexedChars = indexedChars; + this.ignoreMissing = ignoreMissing; + } + + boolean isIgnoreMissing() { + return ignoreMissing; } @Override public void execute(IngestDocument ingestDocument) { Map<String, Object> additionalFields = new HashMap<>(); + byte[] input = ingestDocument.getFieldValueAsBytes(field, ignoreMissing); + + if (input == null && ignoreMissing) { + return; + } else if (input == null) { + throw new IllegalArgumentException("field [" + field + "] is null, cannot parse."); + } + try { Metadata metadata = new Metadata(); - byte[] input = ingestDocument.getFieldValueAsBytes(field); String parsedContent = TikaImpl.parse(input, metadata, indexedChars); if (properties.contains(Property.CONTENT) && Strings.hasLength(parsedContent)) { @@ -166,6 +180,7 @@ public final class AttachmentProcessor extends AbstractProcessor { String targetField = readStringProperty(TYPE, processorTag, config, "target_field", "attachment"); List<String> properyNames = readOptionalList(TYPE, processorTag, config, "properties"); int indexedChars = readIntProperty(TYPE, processorTag, config, "indexed_chars", NUMBER_OF_CHARS_INDEXED); + boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false); final Set<Property> properties; if (properyNames != null) { @@ -182,7 +197,7 @@ public final class AttachmentProcessor extends AbstractProcessor { properties = DEFAULT_PROPERTIES; } - return new AttachmentProcessor(processorTag, field, targetField, properties, indexedChars); + return new AttachmentProcessor(processorTag, field, targetField, properties, indexedChars, ignoreMissing); } } diff --git a/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorFactoryTests.java b/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorFactoryTests.java index 2848d6c3c6..376214eb17 100644 --- a/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorFactoryTests.java +++ b/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorFactoryTests.java @@ -52,6 +52,7 @@ public class AttachmentProcessorFactoryTests extends ESTestCase { assertThat(processor.getField(), equalTo("_field")); assertThat(processor.getTargetField(), equalTo("attachment")); assertThat(processor.getProperties(), sameInstance(AttachmentProcessor.Factory.DEFAULT_PROPERTIES)); + assertFalse(processor.isIgnoreMissing()); } public void testConfigureIndexedChars() throws Exception { @@ -64,6 +65,7 @@ public class AttachmentProcessorFactoryTests extends ESTestCase { AttachmentProcessor processor = factory.create(null, processorTag, config); assertThat(processor.getTag(), equalTo(processorTag)); assertThat(processor.getIndexedChars(), is(indexedChars)); + assertFalse(processor.isIgnoreMissing()); } public void testBuildTargetField() throws Exception { @@ -73,6 +75,7 @@ public class AttachmentProcessorFactoryTests extends ESTestCase { AttachmentProcessor processor = factory.create(null, null, config); assertThat(processor.getField(), equalTo("_field")); assertThat(processor.getTargetField(), equalTo("_field")); + assertFalse(processor.isIgnoreMissing()); } public void testBuildFields() throws Exception { @@ -90,6 +93,7 @@ public class AttachmentProcessorFactoryTests extends ESTestCase { AttachmentProcessor processor = factory.create(null, null, config); assertThat(processor.getField(), equalTo("_field")); assertThat(processor.getProperties(), equalTo(properties)); + assertFalse(processor.isIgnoreMissing()); } public void testBuildIllegalFieldOption() throws Exception { @@ -117,4 +121,19 @@ public class AttachmentProcessorFactoryTests extends ESTestCase { assertThat(e.getMessage(), equalTo("[properties] property isn't a list, but of type [java.lang.String]")); } } + + public void testIgnoreMissing() throws Exception { + Map<String, Object> config = new HashMap<>(); + config.put("field", "_field"); + config.put("ignore_missing", true); + + String processorTag = randomAsciiOfLength(10); + + AttachmentProcessor processor = factory.create(null, processorTag, config); + assertThat(processor.getTag(), equalTo(processorTag)); + assertThat(processor.getField(), equalTo("_field")); + assertThat(processor.getTargetField(), equalTo("attachment")); + assertThat(processor.getProperties(), sameInstance(AttachmentProcessor.Factory.DEFAULT_PROPERTIES)); + assertTrue(processor.isIgnoreMissing()); + } } diff --git a/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorTests.java b/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorTests.java index 3708a290de..b59457b5b0 100644 --- a/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorTests.java +++ b/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorTests.java @@ -22,6 +22,7 @@ package org.elasticsearch.ingest.attachment; import org.apache.commons.io.IOUtils; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.ingest.IngestDocument; +import org.elasticsearch.ingest.Processor; import org.elasticsearch.ingest.RandomDocumentPicks; import org.elasticsearch.test.ESTestCase; import org.junit.Before; @@ -30,14 +31,17 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Base64; +import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; +import static org.elasticsearch.ingest.IngestDocumentMatcher.assertIngestDocument; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.is; @@ -52,7 +56,7 @@ public class AttachmentProcessorTests extends ESTestCase { @Before public void createStandardProcessor() throws IOException { processor = new AttachmentProcessor(randomAsciiOfLength(10), "source_field", - "target_field", EnumSet.allOf(AttachmentProcessor.Property.class), 10000); + "target_field", EnumSet.allOf(AttachmentProcessor.Property.class), 10000, false); } public void testEnglishTextDocument() throws Exception { @@ -85,7 +89,7 @@ public class AttachmentProcessorTests extends ESTestCase { selectedProperties.add(AttachmentProcessor.Property.DATE); } processor = new AttachmentProcessor(randomAsciiOfLength(10), "source_field", - "target_field", selectedProperties, 10000); + "target_field", selectedProperties, 10000, false); Map<String, Object> attachmentData = parseDocument("htmlWithEmptyDateMeta.html", processor); assertThat(attachmentData.keySet(), hasSize(selectedFieldNames.length)); @@ -199,6 +203,40 @@ public class AttachmentProcessorTests extends ESTestCase { assertThat(attachmentData.get("content_length"), is(notNullValue())); } + public void testNullValueWithIgnoreMissing() throws Exception { + IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), + Collections.singletonMap("source_field", null)); + IngestDocument ingestDocument = new IngestDocument(originalIngestDocument); + Processor processor = new AttachmentProcessor(randomAsciiOfLength(10), "source_field", "randomTarget", null, 10, true); + processor.execute(ingestDocument); + assertIngestDocument(originalIngestDocument, ingestDocument); + } + + public void testNonExistentWithIgnoreMissing() throws Exception { + IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap()); + IngestDocument ingestDocument = new IngestDocument(originalIngestDocument); + Processor processor = new AttachmentProcessor(randomAsciiOfLength(10), "source_field", "randomTarget", null, 10, true); + processor.execute(ingestDocument); + assertIngestDocument(originalIngestDocument, ingestDocument); + } + + public void testNullWithoutIgnoreMissing() throws Exception { + IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), + Collections.singletonMap("source_field", null)); + IngestDocument ingestDocument = new IngestDocument(originalIngestDocument); + Processor processor = new AttachmentProcessor(randomAsciiOfLength(10), "source_field", "randomTarget", null, 10, false); + Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument)); + assertThat(exception.getMessage(), equalTo("field [source_field] is null, cannot parse.")); + } + + public void testNonExistentWithoutIgnoreMissing() throws Exception { + IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap()); + IngestDocument ingestDocument = new IngestDocument(originalIngestDocument); + Processor processor = new AttachmentProcessor(randomAsciiOfLength(10), "source_field", "randomTarget", null, 10, false); + Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument)); + assertThat(exception.getMessage(), equalTo("field [source_field] not present as part of path [source_field]")); + } + private Map<String, Object> parseDocument(String file, AttachmentProcessor processor) throws Exception { Map<String, Object> document = new HashMap<>(); document.put("source_field", getAsBase64(file)); |