summaryrefslogtreecommitdiff
path: root/plugins/ingest-attachment
diff options
context:
space:
mode:
authorDavid Pilato <david@pilato.fr>2017-02-03 15:18:59 +0100
committerDavid Pilato <david@pilato.fr>2017-02-03 15:19:00 +0100
commit7a8680c1a4c20b6a2dc68b52e4a59b4c951601f4 (patch)
tree0eae257e98fe79727e44fa0eac4681278f3b54b3 /plugins/ingest-attachment
parent6e9940283be2b7295b6184404675c68dc6db6559 (diff)
Replace tika-files.zip by a tika-files dir
Let's make our life easier when debugging/testing. Also having a flat dir helps us to compare or "synchronize" more easily with Tika project files. Closes #22958.
Diffstat (limited to 'plugins/ingest-attachment')
-rw-r--r--plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/TikaDocTests.java11
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files.zipbin6363020 -> 0 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/Doc1_ole.doc.zipbin0 -> 58819 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/EmbeddedDocument.docx.zipbin0 -> 12347 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/EmbeddedOutlook.docx.zipbin0 -> 110614 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/EmbeddedPDF.docx.zipbin0 -> 96733 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/NullHeader.docx.zipbin0 -> 4026 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/big-preamble.html.zipbin0 -> 8803 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/boilerplate-whitespace.html.zipbin0 -> 707 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/boilerplate.html.zipbin0 -> 663 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/footnotes.docx.zipbin0 -> 9598 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/headerPic.docx.zipbin0 -> 13280 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/headers.mbox.zipbin0 -> 321 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/pictures.ppt.zipbin0 -> 65806 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/protect.xlsx.zipbin0 -> 8380 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/protectedFile.xlsx.zipbin0 -> 8392 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/protectedSheets.xlsx.zipbin0 -> 7605 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/resume.html.zipbin0 -> 3601 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test-outlook.msg.zipbin0 -> 9652 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test-outlook2003.msg.zipbin0 -> 32570 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test.doc.zipbin0 -> 1550 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testAnnotations.pdf.zipbin0 -> 16515 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testBinControlWord.rtf.zipbin0 -> 1470 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testBulletPoints.key.zipbin0 -> 93946 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.doc.zipbin0 -> 5659 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.docx.zipbin0 -> 8489 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.pdf.zipbin0 -> 61545 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.ppt.zipbin0 -> 24181 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.pptx.zipbin0 -> 25648 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.rtf.zipbin0 -> 7650 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.xls.zipbin0 -> 3984 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.xlsx.zipbin0 -> 7035 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testDOCX_Thumbnail.docx.zipbin0 -> 11130 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL-charts.xls.zipbin0 -> 2537 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL-formats.xls.zipbin0 -> 2003 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL-formats.xlsx.zipbin0 -> 6142 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL.xls.zipbin0 -> 3163 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL.xlsx.zipbin0 -> 6658 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_1img.xls.zipbin0 -> 8768 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_1img.xlsx.zipbin0 -> 11562 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_4.xls.zipbin0 -> 14890 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_5.xls.zipbin0 -> 1770 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_95.xls.zipbin0 -> 3435 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_custom_props.xls.zipbin0 -> 3876 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_custom_props.xlsx.zipbin0 -> 6232 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_embeded.xls.zipbin0 -> 262947 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_embeded.xlsx.zipbin0 -> 344962 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_headers_footers.xls.zipbin0 -> 6200 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_headers_footers.xlsx.zipbin0 -> 8367 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_textbox.xlsx.zipbin0 -> 7276 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testExtraSpaces.pdf.zipbin0 -> 13483 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testFontAfterBufferedText.rtf.zipbin0 -> 351 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testFooter.ods.zipbin0 -> 5548 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testFooter.odt.zipbin0 -> 6545 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTML.html.zipbin0 -> 813 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTMLNoisyMetaEncoding_1.html.zipbin0 -> 1318 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTMLNoisyMetaEncoding_2.html.zipbin0 -> 1318 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTMLNoisyMetaEncoding_3.html.zipbin0 -> 1319 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTMLNoisyMetaEncoding_4.html.zipbin0 -> 1319 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTML_utf8.html.zipbin0 -> 741 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testKeynote.key.zipbin0 -> 136303 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testMasterFooter.odp.zipbin0 -> 12112 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testMasterSlideTable.key.zipbin0 -> 103049 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testNPEOpenDocument.odt.zipbin0 -> 16404 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testODFwithOOo3.odt.zipbin0 -> 19795 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOpenOffice2.odt.zipbin0 -> 25304 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.doc.zipbin0 -> 5326 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.docx.zipbin0 -> 7980 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.pdf.zipbin0 -> 38548 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.ppt.zipbin0 -> 24024 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.pptx.zipbin0 -> 24401 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.rtf.zipbin0 -> 7289 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOverlappingText.pdf.zipbin0 -> 699 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF-custommetadata.pdf.zipbin0 -> 4709 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF.pdf.zipbin0 -> 32269 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFEmbeddingAndEmbedded.docx.zipbin0 -> 29040 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFFileEmbInAnnotation.pdf.zipbin0 -> 91253 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFPackage.pdf.zipbin0 -> 80520 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFTripleLangTitle.pdf.zipbin0 -> 1046 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFTwoTextBoxes.pdf.zipbin0 -> 51271 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFVarious.pdf.zipbin0 -> 184983 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_PDFEncodedStringInXMP.pdf.zipbin0 -> 6032 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.10.x.pdf.zipbin0 -> 2317 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.11.x.PDFA-1b.pdf.zipbin0 -> 6195 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.4.x.pdf.zipbin0 -> 2214 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.5.x.pdf.zipbin0 -> 1999 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.6.x.pdf.zipbin0 -> 2259 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.7.x.pdf.zipbin0 -> 2257 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.8.x.pdf.zipbin0 -> 2263 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.9.x.pdf.zipbin0 -> 2315 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_acroform3.pdf.zipbin0 -> 20472 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_bom.pdf.zipbin0 -> 6757 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_bookmarks.pdf.zipbin0 -> 2857 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_multiFormatEmbFiles.pdf.zipbin0 -> 1133 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_no_extract_no_accessibility_owner_empty.pdf.zipbin0 -> 864 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_no_extract_yes_accessibility_owner_empty.pdf.zipbin0 -> 867 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_twoAuthors.pdf.zipbin0 -> 3776 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.potm.zipbin0 -> 31580 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.ppsm.zipbin0 -> 27291 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.ppsx.zipbin0 -> 27270 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.ppt.zipbin0 -> 4126 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.pptm.zipbin0 -> 27286 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.pptx.zipbin0 -> 27267 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPTX_Thumbnail.pptx.zipbin0 -> 33462 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_2imgs.ppt.zipbin0 -> 41363 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_2imgs.pptx.zipbin0 -> 48277 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_autodate.ppt.zipbin0 -> 44845 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_autodate.pptx.zipbin0 -> 34245 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_comment.ppt.zipbin0 -> 22911 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_comment.pptx.zipbin0 -> 22464 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_custom_props.ppt.zipbin0 -> 25712 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_custom_props.pptx.zipbin0 -> 28654 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_embedded2.ppt.zipbin0 -> 35998 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_embedded_two_slides.pptx.zipbin0 -> 230839 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_embeded.ppt.zipbin0 -> 141124 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_embeded.pptx.zipbin0 -> 184949 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterFooter.ppt.zipbin0 -> 40404 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterFooter.pptx.zipbin0 -> 24470 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterText.ppt.zipbin0 -> 31891 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterText.pptx.zipbin0 -> 22862 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterText2.ppt.zipbin0 -> 25013 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterText2.pptx.zipbin0 -> 22826 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_various.ppt.zipbin0 -> 46538 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_various.pptx.zipbin0 -> 40129 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPageNumber.pdf.zipbin0 -> 45842 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPages.pages.zipbin0 -> 126169 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesComments.pages.zipbin0 -> 148711 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersAlphaLower.pages.zipbin0 -> 161192 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersAlphaUpper.pages.zipbin0 -> 161525 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersFootnotes.pages.zipbin0 -> 168829 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersRomanLower.pages.zipbin0 -> 101903 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersRomanUpper.pages.zipbin0 -> 166195 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesLayout.pages.zipbin0 -> 59714 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPopupAnnotation.pdf.zipbin0 -> 4626 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTF-ms932.rtf.zipbin0 -> 1774 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTF.rtf.zipbin0 -> 1429 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFBoldItalic.rtf.zipbin0 -> 7377 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFControls.rtf.zipbin0 -> 7430 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFCorruptListOverride.rtf.zipbin0 -> 1537 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFEmbeddedLink.rtf.zipbin0 -> 24711 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFHexEscapeInsideWord.rtf.zipbin0 -> 372 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFHyperlink.rtf.zipbin0 -> 4938 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFIgnoredControlWord.rtf.zipbin0 -> 931 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFInvalidUnicode.rtf.zipbin0 -> 423 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFJapanese.rtf.zipbin0 -> 3926 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFListLibreOffice.rtf.zipbin0 -> 1445 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFListMicrosoftWord.rtf.zipbin0 -> 8721 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFListOverride.rtf.zipbin0 -> 2182 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFNewlines.rtf.zipbin0 -> 1466 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFRegularImages.rtf.zipbin0 -> 51602 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFTableCellSeparation.rtf.zipbin0 -> 531 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFTableCellSeparation2.rtf.zipbin0 -> 373 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFUmlautSpaces.rtf.zipbin0 -> 350 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFUmlautSpaces2.rtf.zipbin0 -> 382 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFUnicodeGothic.rtf.zipbin0 -> 358 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFUnicodeUCNControlWordCharacterDoubling.rtf.zipbin0 -> 523 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFVarious.rtf.zipbin0 -> 11810 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFWindowsCodepage1250.rtf.zipbin0 -> 410 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFWithCurlyBraces.rtf.zipbin0 -> 2329 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFWord2010CzechCharacters.rtf.zipbin0 -> 8101 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFWordPadCzechCharacters.rtf.zipbin0 -> 723 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testStyles.odt.zipbin0 -> 9306 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testTXTNonASCIIUTF8.txt.zipbin0 -> 381 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testTables.key.zipbin0 -> 99786 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testTextBoxes.key.zipbin0 -> 96429 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD.doc.zipbin0 -> 8011 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD.docx.zipbin0 -> 11114 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD6.doc.zipbin0 -> 1378 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_1img.doc.zipbin0 -> 6514 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_1img.docx.zipbin0 -> 7842 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_3imgs.doc.zipbin0 -> 27218 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_3imgs.docx.zipbin0 -> 30658 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_bold_character_runs.doc.zipbin0 -> 5345 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_bold_character_runs.docx.zipbin0 -> 10292 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_bold_character_runs2.doc.zipbin0 -> 5342 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_bold_character_runs2.docx.zipbin0 -> 10308 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_closingSmartQInHyperLink.doc.zipbin0 -> 5833 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_custom_props.doc.zipbin0 -> 5792 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_custom_props.docx.zipbin0 -> 10994 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_embedded_pdf.docx.zipbin0 -> 26547 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_embedded_rtf.doc.zipbin0 -> 3612 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_embeded.doc.zipbin0 -> 238281 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_embeded.docx.zipbin0 -> 152189 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_header_hyperlink.doc.zipbin0 -> 6000 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_missing_ooxml_bean1.docx.zipbin0 -> 12962 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_missing_text.docx.zipbin0 -> 27021 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_no_format.doc.zipbin0 -> 16153 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_no_format.docx.zipbin0 -> 32657 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_null_style.docx.zipbin0 -> 20449 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_numbered_list.doc.zipbin0 -> 11035 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_numbered_list.docx.zipbin0 -> 21164 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_override_list_numbering.doc.zipbin0 -> 8702 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_override_list_numbering.docx.zipbin0 -> 13146 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_tabular_symbol.doc.zipbin0 -> 2213 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_text_box.docx.zipbin0 -> 22376 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_various.doc.zipbin0 -> 8971 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_various.docx.zipbin0 -> 15740 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWordArt.pptx.zipbin0 -> 29318 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXHTML.html.zipbin0 -> 822 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXLSX_Thumbnail.xlsx.zipbin0 -> 7379 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXML.xml.zipbin0 -> 1000 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXML2.xml.zipbin0 -> 343 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXML3.xml.zipbin0 -> 409 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_TIKA-1251.doc.zipbin0 -> 12415 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_embedded_package.rtf.zipbin0 -> 4106 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_embedded_zip.pptx.zipbin0 -> 263698 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_list_override.rtf.zipbin0 -> 486 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_recursive_embedded.docx.zipbin0 -> 22127 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_recursive_embedded_npe.docx.zipbin0 -> 22871 bytes
-rw-r--r--plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/tika434.html.zipbin0 -> 11019 bytes
210 files changed, 9 insertions, 2 deletions
diff --git a/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/TikaDocTests.java b/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/TikaDocTests.java
index 4b9a40dd8a..5535433144 100644
--- a/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/TikaDocTests.java
+++ b/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/TikaDocTests.java
@@ -27,6 +27,7 @@ import org.elasticsearch.test.ESTestCase;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.nio.file.Paths;
/**
* Evil test-coverage cheat, we parse a bunch of docs from tika
@@ -37,11 +38,17 @@ import java.nio.file.Path;
public class TikaDocTests extends ESTestCase {
/** some test files from tika test suite, zipped up */
- static final String TIKA_FILES = "/org/elasticsearch/ingest/attachment/test/tika-files.zip";
+ static final String TIKA_FILES = "/org/elasticsearch/ingest/attachment/test/tika-files/";
public void testFiles() throws Exception {
Path tmp = createTempDir();
- TestUtil.unzip(getClass().getResourceAsStream(TIKA_FILES), tmp);
+ logger.debug("unzipping all tika sample files");
+ try (DirectoryStream<Path> stream = Files.newDirectoryStream(Paths.get(getClass().getResource(TIKA_FILES).toURI()))) {
+ for (Path doc : stream) {
+ String filename = doc.getFileName().toString();
+ TestUtil.unzip(getClass().getResourceAsStream(TIKA_FILES + filename), tmp);
+ }
+ }
try (DirectoryStream<Path> stream = Files.newDirectoryStream(tmp)) {
for (Path doc : stream) {
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files.zip
deleted file mode 100644
index cfc2e54b79..0000000000
--- a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files.zip
+++ /dev/null
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/Doc1_ole.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/Doc1_ole.doc.zip
new file mode 100644
index 0000000000..f9bd7c46b2
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/Doc1_ole.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/EmbeddedDocument.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/EmbeddedDocument.docx.zip
new file mode 100644
index 0000000000..bb4e8e1ef3
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/EmbeddedDocument.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/EmbeddedOutlook.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/EmbeddedOutlook.docx.zip
new file mode 100644
index 0000000000..67de787f01
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/EmbeddedOutlook.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/EmbeddedPDF.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/EmbeddedPDF.docx.zip
new file mode 100644
index 0000000000..7e02f55c03
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/EmbeddedPDF.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/NullHeader.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/NullHeader.docx.zip
new file mode 100644
index 0000000000..0516c35b07
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/NullHeader.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/big-preamble.html.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/big-preamble.html.zip
new file mode 100644
index 0000000000..fc851923ab
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/big-preamble.html.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/boilerplate-whitespace.html.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/boilerplate-whitespace.html.zip
new file mode 100644
index 0000000000..2d1f8e601c
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/boilerplate-whitespace.html.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/boilerplate.html.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/boilerplate.html.zip
new file mode 100644
index 0000000000..c728a6303b
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/boilerplate.html.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/footnotes.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/footnotes.docx.zip
new file mode 100644
index 0000000000..21376b6917
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/footnotes.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/headerPic.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/headerPic.docx.zip
new file mode 100644
index 0000000000..6bb9b3f463
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/headerPic.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/headers.mbox.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/headers.mbox.zip
new file mode 100644
index 0000000000..f36e6b8de4
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/headers.mbox.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/pictures.ppt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/pictures.ppt.zip
new file mode 100644
index 0000000000..55dde5eada
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/pictures.ppt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/protect.xlsx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/protect.xlsx.zip
new file mode 100644
index 0000000000..9823a8debf
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/protect.xlsx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/protectedFile.xlsx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/protectedFile.xlsx.zip
new file mode 100644
index 0000000000..b4de09b0fd
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/protectedFile.xlsx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/protectedSheets.xlsx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/protectedSheets.xlsx.zip
new file mode 100644
index 0000000000..8dde8a493a
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/protectedSheets.xlsx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/resume.html.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/resume.html.zip
new file mode 100644
index 0000000000..0c53eae3b3
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/resume.html.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test-outlook.msg.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test-outlook.msg.zip
new file mode 100644
index 0000000000..e9a0bfbed0
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test-outlook.msg.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test-outlook2003.msg.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test-outlook2003.msg.zip
new file mode 100644
index 0000000000..bce3da6e05
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test-outlook2003.msg.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test.doc.zip
new file mode 100644
index 0000000000..4bcf68dceb
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testAnnotations.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testAnnotations.pdf.zip
new file mode 100644
index 0000000000..3268c952a3
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testAnnotations.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testBinControlWord.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testBinControlWord.rtf.zip
new file mode 100644
index 0000000000..4a7efa19f0
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testBinControlWord.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testBulletPoints.key.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testBulletPoints.key.zip
new file mode 100644
index 0000000000..41b896525c
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testBulletPoints.key.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.doc.zip
new file mode 100644
index 0000000000..15a056cb3a
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.docx.zip
new file mode 100644
index 0000000000..30b4f6877a
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.pdf.zip
new file mode 100644
index 0000000000..bb21a9d710
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.ppt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.ppt.zip
new file mode 100644
index 0000000000..e0122d4004
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.ppt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.pptx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.pptx.zip
new file mode 100644
index 0000000000..77967db80c
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.pptx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.rtf.zip
new file mode 100644
index 0000000000..b7b6bdf5c1
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.xls.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.xls.zip
new file mode 100644
index 0000000000..7250235b89
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.xls.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.xlsx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.xlsx.zip
new file mode 100644
index 0000000000..7fe71de731
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testComment.xlsx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testDOCX_Thumbnail.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testDOCX_Thumbnail.docx.zip
new file mode 100644
index 0000000000..e0fd82103f
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testDOCX_Thumbnail.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL-charts.xls.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL-charts.xls.zip
new file mode 100644
index 0000000000..fd5d8472d4
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL-charts.xls.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL-formats.xls.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL-formats.xls.zip
new file mode 100644
index 0000000000..f1015248b6
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL-formats.xls.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL-formats.xlsx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL-formats.xlsx.zip
new file mode 100644
index 0000000000..b62c8813e9
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL-formats.xlsx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL.xls.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL.xls.zip
new file mode 100644
index 0000000000..f0dc3b00f9
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL.xls.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL.xlsx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL.xlsx.zip
new file mode 100644
index 0000000000..45b3dae6f4
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL.xlsx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_1img.xls.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_1img.xls.zip
new file mode 100644
index 0000000000..09cb216169
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_1img.xls.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_1img.xlsx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_1img.xlsx.zip
new file mode 100644
index 0000000000..8122866182
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_1img.xlsx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_4.xls.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_4.xls.zip
new file mode 100644
index 0000000000..46da553af9
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_4.xls.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_5.xls.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_5.xls.zip
new file mode 100644
index 0000000000..ca3e08c470
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_5.xls.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_95.xls.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_95.xls.zip
new file mode 100644
index 0000000000..00e3d373d1
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_95.xls.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_custom_props.xls.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_custom_props.xls.zip
new file mode 100644
index 0000000000..2fa4d6b8b9
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_custom_props.xls.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_custom_props.xlsx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_custom_props.xlsx.zip
new file mode 100644
index 0000000000..9cfd16cec2
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_custom_props.xlsx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_embeded.xls.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_embeded.xls.zip
new file mode 100644
index 0000000000..b7291cafc9
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_embeded.xls.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_embeded.xlsx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_embeded.xlsx.zip
new file mode 100644
index 0000000000..755b7395e8
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_embeded.xlsx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_headers_footers.xls.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_headers_footers.xls.zip
new file mode 100644
index 0000000000..6cb18625ca
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_headers_footers.xls.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_headers_footers.xlsx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_headers_footers.xlsx.zip
new file mode 100644
index 0000000000..924ad5eece
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_headers_footers.xlsx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_textbox.xlsx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_textbox.xlsx.zip
new file mode 100644
index 0000000000..43046dff52
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testEXCEL_textbox.xlsx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testExtraSpaces.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testExtraSpaces.pdf.zip
new file mode 100644
index 0000000000..8f6ed7a19f
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testExtraSpaces.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testFontAfterBufferedText.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testFontAfterBufferedText.rtf.zip
new file mode 100644
index 0000000000..77a5ac60dc
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testFontAfterBufferedText.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testFooter.ods.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testFooter.ods.zip
new file mode 100644
index 0000000000..8659af1e69
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testFooter.ods.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testFooter.odt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testFooter.odt.zip
new file mode 100644
index 0000000000..85ba550144
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testFooter.odt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTML.html.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTML.html.zip
new file mode 100644
index 0000000000..58cfd63825
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTML.html.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTMLNoisyMetaEncoding_1.html.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTMLNoisyMetaEncoding_1.html.zip
new file mode 100644
index 0000000000..a12735a520
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTMLNoisyMetaEncoding_1.html.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTMLNoisyMetaEncoding_2.html.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTMLNoisyMetaEncoding_2.html.zip
new file mode 100644
index 0000000000..89a3f9d7aa
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTMLNoisyMetaEncoding_2.html.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTMLNoisyMetaEncoding_3.html.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTMLNoisyMetaEncoding_3.html.zip
new file mode 100644
index 0000000000..da23b13ee4
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTMLNoisyMetaEncoding_3.html.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTMLNoisyMetaEncoding_4.html.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTMLNoisyMetaEncoding_4.html.zip
new file mode 100644
index 0000000000..15850474d9
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTMLNoisyMetaEncoding_4.html.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTML_utf8.html.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTML_utf8.html.zip
new file mode 100644
index 0000000000..bdb5e58f37
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testHTML_utf8.html.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testKeynote.key.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testKeynote.key.zip
new file mode 100644
index 0000000000..68693369d1
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testKeynote.key.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testMasterFooter.odp.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testMasterFooter.odp.zip
new file mode 100644
index 0000000000..42467aa951
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testMasterFooter.odp.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testMasterSlideTable.key.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testMasterSlideTable.key.zip
new file mode 100644
index 0000000000..8d7803d396
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testMasterSlideTable.key.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testNPEOpenDocument.odt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testNPEOpenDocument.odt.zip
new file mode 100644
index 0000000000..4c41bc7a46
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testNPEOpenDocument.odt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testODFwithOOo3.odt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testODFwithOOo3.odt.zip
new file mode 100644
index 0000000000..9202fe44e4
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testODFwithOOo3.odt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOpenOffice2.odt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOpenOffice2.odt.zip
new file mode 100644
index 0000000000..1993365b5d
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOpenOffice2.odt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.doc.zip
new file mode 100644
index 0000000000..b9f515941e
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.docx.zip
new file mode 100644
index 0000000000..033871556d
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.pdf.zip
new file mode 100644
index 0000000000..37fab93905
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.ppt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.ppt.zip
new file mode 100644
index 0000000000..e50261f432
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.ppt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.pptx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.pptx.zip
new file mode 100644
index 0000000000..19d6e78e6d
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.pptx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.rtf.zip
new file mode 100644
index 0000000000..d86048798c
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOptionalHyphen.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOverlappingText.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOverlappingText.pdf.zip
new file mode 100644
index 0000000000..c7f8b78704
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testOverlappingText.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF-custommetadata.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF-custommetadata.pdf.zip
new file mode 100644
index 0000000000..44865eca76
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF-custommetadata.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF.pdf.zip
new file mode 100644
index 0000000000..fd5195b053
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFEmbeddingAndEmbedded.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFEmbeddingAndEmbedded.docx.zip
new file mode 100644
index 0000000000..a7ceed2066
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFEmbeddingAndEmbedded.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFFileEmbInAnnotation.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFFileEmbInAnnotation.pdf.zip
new file mode 100644
index 0000000000..4b6e333895
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFFileEmbInAnnotation.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFPackage.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFPackage.pdf.zip
new file mode 100644
index 0000000000..8bbb9166bb
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFPackage.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFTripleLangTitle.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFTripleLangTitle.pdf.zip
new file mode 100644
index 0000000000..2d1ebdbdb2
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFTripleLangTitle.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFTwoTextBoxes.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFTwoTextBoxes.pdf.zip
new file mode 100644
index 0000000000..d05907a3da
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFTwoTextBoxes.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFVarious.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFVarious.pdf.zip
new file mode 100644
index 0000000000..cf86cbae79
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDFVarious.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_PDFEncodedStringInXMP.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_PDFEncodedStringInXMP.pdf.zip
new file mode 100644
index 0000000000..e8377846e7
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_PDFEncodedStringInXMP.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.10.x.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.10.x.pdf.zip
new file mode 100644
index 0000000000..4fffed77ad
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.10.x.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.11.x.PDFA-1b.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.11.x.PDFA-1b.pdf.zip
new file mode 100644
index 0000000000..16098515ce
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.11.x.PDFA-1b.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.4.x.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.4.x.pdf.zip
new file mode 100644
index 0000000000..cdf9334757
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.4.x.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.5.x.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.5.x.pdf.zip
new file mode 100644
index 0000000000..e5ef413b8b
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.5.x.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.6.x.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.6.x.pdf.zip
new file mode 100644
index 0000000000..b57b4dc4ef
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.6.x.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.7.x.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.7.x.pdf.zip
new file mode 100644
index 0000000000..d69bbb50fe
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.7.x.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.8.x.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.8.x.pdf.zip
new file mode 100644
index 0000000000..e077af2074
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.8.x.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.9.x.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.9.x.pdf.zip
new file mode 100644
index 0000000000..c9b6857c15
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_Version.9.x.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_acroform3.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_acroform3.pdf.zip
new file mode 100644
index 0000000000..267d41df31
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_acroform3.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_bom.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_bom.pdf.zip
new file mode 100644
index 0000000000..c4ea31f96f
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_bom.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_bookmarks.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_bookmarks.pdf.zip
new file mode 100644
index 0000000000..88a5db5e63
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_bookmarks.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_multiFormatEmbFiles.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_multiFormatEmbFiles.pdf.zip
new file mode 100644
index 0000000000..e61f2bf111
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_multiFormatEmbFiles.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_no_extract_no_accessibility_owner_empty.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_no_extract_no_accessibility_owner_empty.pdf.zip
new file mode 100644
index 0000000000..16920b49e3
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_no_extract_no_accessibility_owner_empty.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_no_extract_yes_accessibility_owner_empty.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_no_extract_yes_accessibility_owner_empty.pdf.zip
new file mode 100644
index 0000000000..59887f7e5d
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_no_extract_yes_accessibility_owner_empty.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_twoAuthors.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_twoAuthors.pdf.zip
new file mode 100644
index 0000000000..d240c51d82
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPDF_twoAuthors.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.potm.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.potm.zip
new file mode 100644
index 0000000000..0f87b774a2
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.potm.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.ppsm.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.ppsm.zip
new file mode 100644
index 0000000000..47f652d14a
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.ppsm.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.ppsx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.ppsx.zip
new file mode 100644
index 0000000000..0f21f13be0
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.ppsx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.ppt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.ppt.zip
new file mode 100644
index 0000000000..00c5c88c6e
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.ppt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.pptm.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.pptm.zip
new file mode 100644
index 0000000000..edd3e444fd
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.pptm.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.pptx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.pptx.zip
new file mode 100644
index 0000000000..68a9d66275
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT.pptx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPTX_Thumbnail.pptx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPTX_Thumbnail.pptx.zip
new file mode 100644
index 0000000000..23dba29f32
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPTX_Thumbnail.pptx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_2imgs.ppt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_2imgs.ppt.zip
new file mode 100644
index 0000000000..317e1bbe35
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_2imgs.ppt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_2imgs.pptx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_2imgs.pptx.zip
new file mode 100644
index 0000000000..62096338a4
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_2imgs.pptx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_autodate.ppt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_autodate.ppt.zip
new file mode 100644
index 0000000000..5bd71fbd2e
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_autodate.ppt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_autodate.pptx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_autodate.pptx.zip
new file mode 100644
index 0000000000..811ab85cc2
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_autodate.pptx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_comment.ppt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_comment.ppt.zip
new file mode 100644
index 0000000000..3e94ba4cce
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_comment.ppt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_comment.pptx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_comment.pptx.zip
new file mode 100644
index 0000000000..68d2135d41
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_comment.pptx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_custom_props.ppt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_custom_props.ppt.zip
new file mode 100644
index 0000000000..80673e625d
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_custom_props.ppt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_custom_props.pptx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_custom_props.pptx.zip
new file mode 100644
index 0000000000..d27c98d2c2
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_custom_props.pptx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_embedded2.ppt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_embedded2.ppt.zip
new file mode 100644
index 0000000000..017f9f500c
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_embedded2.ppt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_embedded_two_slides.pptx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_embedded_two_slides.pptx.zip
new file mode 100644
index 0000000000..dfa459cccb
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_embedded_two_slides.pptx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_embeded.ppt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_embeded.ppt.zip
new file mode 100644
index 0000000000..fb8a3caf75
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_embeded.ppt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_embeded.pptx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_embeded.pptx.zip
new file mode 100644
index 0000000000..f33f463c33
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_embeded.pptx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterFooter.ppt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterFooter.ppt.zip
new file mode 100644
index 0000000000..6a72788939
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterFooter.ppt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterFooter.pptx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterFooter.pptx.zip
new file mode 100644
index 0000000000..992dad41c5
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterFooter.pptx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterText.ppt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterText.ppt.zip
new file mode 100644
index 0000000000..e31c4719f4
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterText.ppt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterText.pptx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterText.pptx.zip
new file mode 100644
index 0000000000..1fa4f48748
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterText.pptx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterText2.ppt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterText2.ppt.zip
new file mode 100644
index 0000000000..f58055ecfc
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterText2.ppt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterText2.pptx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterText2.pptx.zip
new file mode 100644
index 0000000000..f8ff31052d
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_masterText2.pptx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_various.ppt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_various.ppt.zip
new file mode 100644
index 0000000000..5d13efb8d4
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_various.ppt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_various.pptx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_various.pptx.zip
new file mode 100644
index 0000000000..5d146a1de0
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPPT_various.pptx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPageNumber.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPageNumber.pdf.zip
new file mode 100644
index 0000000000..4f9db82c29
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPageNumber.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPages.pages.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPages.pages.zip
new file mode 100644
index 0000000000..3db77c0873
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPages.pages.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesComments.pages.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesComments.pages.zip
new file mode 100644
index 0000000000..55004e812d
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesComments.pages.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersAlphaLower.pages.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersAlphaLower.pages.zip
new file mode 100644
index 0000000000..bfea0c7825
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersAlphaLower.pages.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersAlphaUpper.pages.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersAlphaUpper.pages.zip
new file mode 100644
index 0000000000..3298d7eaa8
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersAlphaUpper.pages.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersFootnotes.pages.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersFootnotes.pages.zip
new file mode 100644
index 0000000000..34a4fb8323
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersFootnotes.pages.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersRomanLower.pages.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersRomanLower.pages.zip
new file mode 100644
index 0000000000..b48ebe4458
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersRomanLower.pages.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersRomanUpper.pages.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersRomanUpper.pages.zip
new file mode 100644
index 0000000000..967e139595
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesHeadersFootersRomanUpper.pages.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesLayout.pages.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesLayout.pages.zip
new file mode 100644
index 0000000000..c6f0e04a53
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPagesLayout.pages.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPopupAnnotation.pdf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPopupAnnotation.pdf.zip
new file mode 100644
index 0000000000..4eb7865356
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testPopupAnnotation.pdf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTF-ms932.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTF-ms932.rtf.zip
new file mode 100644
index 0000000000..bd80f2b061
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTF-ms932.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTF.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTF.rtf.zip
new file mode 100644
index 0000000000..72ef8ebfc5
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTF.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFBoldItalic.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFBoldItalic.rtf.zip
new file mode 100644
index 0000000000..935c249176
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFBoldItalic.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFControls.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFControls.rtf.zip
new file mode 100644
index 0000000000..0c55479cc8
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFControls.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFCorruptListOverride.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFCorruptListOverride.rtf.zip
new file mode 100644
index 0000000000..e6ea82d476
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFCorruptListOverride.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFEmbeddedLink.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFEmbeddedLink.rtf.zip
new file mode 100644
index 0000000000..857c589d9e
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFEmbeddedLink.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFHexEscapeInsideWord.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFHexEscapeInsideWord.rtf.zip
new file mode 100644
index 0000000000..f10b768d5a
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFHexEscapeInsideWord.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFHyperlink.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFHyperlink.rtf.zip
new file mode 100644
index 0000000000..658eb87784
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFHyperlink.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFIgnoredControlWord.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFIgnoredControlWord.rtf.zip
new file mode 100644
index 0000000000..15a74d726b
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFIgnoredControlWord.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFInvalidUnicode.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFInvalidUnicode.rtf.zip
new file mode 100644
index 0000000000..f24a5ab9c0
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFInvalidUnicode.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFJapanese.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFJapanese.rtf.zip
new file mode 100644
index 0000000000..8bf4f6e716
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFJapanese.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFListLibreOffice.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFListLibreOffice.rtf.zip
new file mode 100644
index 0000000000..e2a406d99a
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFListLibreOffice.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFListMicrosoftWord.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFListMicrosoftWord.rtf.zip
new file mode 100644
index 0000000000..155718ac8b
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFListMicrosoftWord.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFListOverride.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFListOverride.rtf.zip
new file mode 100644
index 0000000000..c9b7f3fdbe
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFListOverride.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFNewlines.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFNewlines.rtf.zip
new file mode 100644
index 0000000000..8b0dffe135
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFNewlines.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFRegularImages.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFRegularImages.rtf.zip
new file mode 100644
index 0000000000..2bf8d9b530
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFRegularImages.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFTableCellSeparation.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFTableCellSeparation.rtf.zip
new file mode 100644
index 0000000000..5e110de516
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFTableCellSeparation.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFTableCellSeparation2.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFTableCellSeparation2.rtf.zip
new file mode 100644
index 0000000000..599e7f22ed
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFTableCellSeparation2.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFUmlautSpaces.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFUmlautSpaces.rtf.zip
new file mode 100644
index 0000000000..f3cbec9e51
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFUmlautSpaces.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFUmlautSpaces2.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFUmlautSpaces2.rtf.zip
new file mode 100644
index 0000000000..2c905405ca
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFUmlautSpaces2.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFUnicodeGothic.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFUnicodeGothic.rtf.zip
new file mode 100644
index 0000000000..40263351a0
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFUnicodeGothic.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFUnicodeUCNControlWordCharacterDoubling.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFUnicodeUCNControlWordCharacterDoubling.rtf.zip
new file mode 100644
index 0000000000..c8f40b549c
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFUnicodeUCNControlWordCharacterDoubling.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFVarious.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFVarious.rtf.zip
new file mode 100644
index 0000000000..60281e7f9a
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFVarious.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFWindowsCodepage1250.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFWindowsCodepage1250.rtf.zip
new file mode 100644
index 0000000000..4a7852c97d
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFWindowsCodepage1250.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFWithCurlyBraces.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFWithCurlyBraces.rtf.zip
new file mode 100644
index 0000000000..48268a457c
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFWithCurlyBraces.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFWord2010CzechCharacters.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFWord2010CzechCharacters.rtf.zip
new file mode 100644
index 0000000000..be845e4f92
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFWord2010CzechCharacters.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFWordPadCzechCharacters.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFWordPadCzechCharacters.rtf.zip
new file mode 100644
index 0000000000..508801675d
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testRTFWordPadCzechCharacters.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testStyles.odt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testStyles.odt.zip
new file mode 100644
index 0000000000..22f6c76b9a
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testStyles.odt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testTXTNonASCIIUTF8.txt.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testTXTNonASCIIUTF8.txt.zip
new file mode 100644
index 0000000000..7326a28ad7
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testTXTNonASCIIUTF8.txt.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testTables.key.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testTables.key.zip
new file mode 100644
index 0000000000..3c963dcc07
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testTables.key.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testTextBoxes.key.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testTextBoxes.key.zip
new file mode 100644
index 0000000000..aea9b1cf66
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testTextBoxes.key.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD.doc.zip
new file mode 100644
index 0000000000..38b2954a4f
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD.docx.zip
new file mode 100644
index 0000000000..2ed0aae33f
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD6.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD6.doc.zip
new file mode 100644
index 0000000000..b8dbbd8322
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD6.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_1img.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_1img.doc.zip
new file mode 100644
index 0000000000..1baa330e5c
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_1img.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_1img.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_1img.docx.zip
new file mode 100644
index 0000000000..8920521bd2
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_1img.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_3imgs.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_3imgs.doc.zip
new file mode 100644
index 0000000000..b60716b93d
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_3imgs.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_3imgs.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_3imgs.docx.zip
new file mode 100644
index 0000000000..062713d931
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_3imgs.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_bold_character_runs.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_bold_character_runs.doc.zip
new file mode 100644
index 0000000000..b0a6af23d3
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_bold_character_runs.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_bold_character_runs.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_bold_character_runs.docx.zip
new file mode 100644
index 0000000000..546f98bf09
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_bold_character_runs.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_bold_character_runs2.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_bold_character_runs2.doc.zip
new file mode 100644
index 0000000000..5dbedec5e8
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_bold_character_runs2.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_bold_character_runs2.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_bold_character_runs2.docx.zip
new file mode 100644
index 0000000000..8edde24f99
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_bold_character_runs2.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_closingSmartQInHyperLink.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_closingSmartQInHyperLink.doc.zip
new file mode 100644
index 0000000000..4484a6b005
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_closingSmartQInHyperLink.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_custom_props.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_custom_props.doc.zip
new file mode 100644
index 0000000000..72a94832d3
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_custom_props.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_custom_props.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_custom_props.docx.zip
new file mode 100644
index 0000000000..30aaef7d81
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_custom_props.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_embedded_pdf.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_embedded_pdf.docx.zip
new file mode 100644
index 0000000000..2b74f67dc9
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_embedded_pdf.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_embedded_rtf.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_embedded_rtf.doc.zip
new file mode 100644
index 0000000000..e456721065
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_embedded_rtf.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_embeded.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_embeded.doc.zip
new file mode 100644
index 0000000000..699a13b652
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_embeded.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_embeded.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_embeded.docx.zip
new file mode 100644
index 0000000000..b53088ddb7
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_embeded.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_header_hyperlink.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_header_hyperlink.doc.zip
new file mode 100644
index 0000000000..867dc1808f
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_header_hyperlink.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_missing_ooxml_bean1.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_missing_ooxml_bean1.docx.zip
new file mode 100644
index 0000000000..04172a6538
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_missing_ooxml_bean1.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_missing_text.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_missing_text.docx.zip
new file mode 100644
index 0000000000..325ecce927
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_missing_text.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_no_format.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_no_format.doc.zip
new file mode 100644
index 0000000000..38eecb941d
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_no_format.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_no_format.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_no_format.docx.zip
new file mode 100644
index 0000000000..1c19cf3e98
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_no_format.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_null_style.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_null_style.docx.zip
new file mode 100644
index 0000000000..605eeb2502
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_null_style.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_numbered_list.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_numbered_list.doc.zip
new file mode 100644
index 0000000000..7f8d3b9ad2
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_numbered_list.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_numbered_list.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_numbered_list.docx.zip
new file mode 100644
index 0000000000..42b4ed17c4
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_numbered_list.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_override_list_numbering.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_override_list_numbering.doc.zip
new file mode 100644
index 0000000000..39c954b2d1
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_override_list_numbering.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_override_list_numbering.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_override_list_numbering.docx.zip
new file mode 100644
index 0000000000..77bb9dbda6
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_override_list_numbering.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_tabular_symbol.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_tabular_symbol.doc.zip
new file mode 100644
index 0000000000..0c63f98256
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_tabular_symbol.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_text_box.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_text_box.docx.zip
new file mode 100644
index 0000000000..504c56728c
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_text_box.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_various.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_various.doc.zip
new file mode 100644
index 0000000000..9d7c24e052
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_various.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_various.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_various.docx.zip
new file mode 100644
index 0000000000..393aca11ab
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWORD_various.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWordArt.pptx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWordArt.pptx.zip
new file mode 100644
index 0000000000..75234fde98
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testWordArt.pptx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXHTML.html.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXHTML.html.zip
new file mode 100644
index 0000000000..45c4dd0479
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXHTML.html.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXLSX_Thumbnail.xlsx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXLSX_Thumbnail.xlsx.zip
new file mode 100644
index 0000000000..919cf31e75
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXLSX_Thumbnail.xlsx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXML.xml.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXML.xml.zip
new file mode 100644
index 0000000000..a6520a8a81
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXML.xml.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXML2.xml.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXML2.xml.zip
new file mode 100644
index 0000000000..5b89863685
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXML2.xml.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXML3.xml.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXML3.xml.zip
new file mode 100644
index 0000000000..08a066d9c1
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/testXML3.xml.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_TIKA-1251.doc.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_TIKA-1251.doc.zip
new file mode 100644
index 0000000000..4d30712902
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_TIKA-1251.doc.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_embedded_package.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_embedded_package.rtf.zip
new file mode 100644
index 0000000000..58d7f15b23
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_embedded_package.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_embedded_zip.pptx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_embedded_zip.pptx.zip
new file mode 100644
index 0000000000..e9d802cfd4
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_embedded_zip.pptx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_list_override.rtf.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_list_override.rtf.zip
new file mode 100644
index 0000000000..57d5553829
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_list_override.rtf.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_recursive_embedded.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_recursive_embedded.docx.zip
new file mode 100644
index 0000000000..edd97967e9
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_recursive_embedded.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_recursive_embedded_npe.docx.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_recursive_embedded_npe.docx.zip
new file mode 100644
index 0000000000..84da48f025
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/test_recursive_embedded_npe.docx.zip
Binary files differ
diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/tika434.html.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/tika434.html.zip
new file mode 100644
index 0000000000..41972bc844
--- /dev/null
+++ b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files/tika434.html.zip
Binary files differ