Forráskód Böngészése

Update Tika version to 1.15

This commit upgrades the Tika dependency to version 1.15.

Relates #25003
Guillaume Le Floch 8 éve
szülő
commit
ac5fd6a7d9
25 módosított fájl, 284 hozzáadás és 39 törlés
  1. 26 29
      plugins/ingest-attachment/build.gradle
  2. 1 0
      plugins/ingest-attachment/licenses/commons-collections4-4.1.jar.sha1
  3. 201 0
      plugins/ingest-attachment/licenses/commons-collections4-LICENSE.txt
  4. 5 0
      plugins/ingest-attachment/licenses/commons-collections4-NOTICE.txt
  5. 0 1
      plugins/ingest-attachment/licenses/commons-compress-1.10.jar.sha1
  6. 1 0
      plugins/ingest-attachment/licenses/commons-compress-1.14.jar.sha1
  7. 0 1
      plugins/ingest-attachment/licenses/poi-3.15.jar.sha1
  8. 1 0
      plugins/ingest-attachment/licenses/poi-3.16.jar.sha1
  9. 0 1
      plugins/ingest-attachment/licenses/poi-ooxml-3.15.jar.sha1
  10. 1 0
      plugins/ingest-attachment/licenses/poi-ooxml-3.16.jar.sha1
  11. 0 1
      plugins/ingest-attachment/licenses/poi-ooxml-schemas-3.15.jar.sha1
  12. 1 0
      plugins/ingest-attachment/licenses/poi-ooxml-schemas-3.16.jar.sha1
  13. 0 1
      plugins/ingest-attachment/licenses/poi-scratchpad-3.15.jar.sha1
  14. 1 0
      plugins/ingest-attachment/licenses/poi-scratchpad-3.16.jar.sha1
  15. 1 0
      plugins/ingest-attachment/licenses/slf4j-api-1.6.2.jar.sha1
  16. 21 0
      plugins/ingest-attachment/licenses/slf4j-api-LICENSE.txt
  17. 0 0
      plugins/ingest-attachment/licenses/slf4j-api-NOTICE.txt
  18. 0 1
      plugins/ingest-attachment/licenses/tika-core-1.14.jar.sha1
  19. 1 0
      plugins/ingest-attachment/licenses/tika-core-1.15.jar.sha1
  20. 0 1
      plugins/ingest-attachment/licenses/tika-parsers-1.14.jar.sha1
  21. 1 0
      plugins/ingest-attachment/licenses/tika-parsers-1.15.jar.sha1
  22. 1 0
      plugins/ingest-attachment/licenses/xz-1.6.jar.sha1
  23. 9 0
      plugins/ingest-attachment/licenses/xz-LICENSE.txt
  24. 1 0
      plugins/ingest-attachment/licenses/xz-NOTICE.txt
  25. 11 3
      plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/TikaImpl.java

+ 26 - 29
plugins/ingest-attachment/build.gradle

@@ -23,10 +23,10 @@ esplugin {
 }
 
 versions << [
-  'tika': '1.14',
+  'tika': '1.15',
   'pdfbox': '2.0.3',
   'bouncycastle': '1.55',
-  'poi': '3.15',
+  'poi': '3.16',
   'mime4j': '0.7.2'
 ]
 
@@ -34,7 +34,9 @@ dependencies {
   // mandatory for tika
   compile "org.apache.tika:tika-core:${versions.tika}"
   compile "org.apache.tika:tika-parsers:${versions.tika}"
+  compile 'org.tukaani:xz:1.6'
   compile 'commons-io:commons-io:2.4'
+  compile "org.slf4j:slf4j-api:${versions.slf4j}"
 
   // character set detection
   compile 'com.googlecode.juniversalchardet:juniversalchardet:1.0.3'
@@ -56,10 +58,11 @@ dependencies {
   compile "org.apache.poi:poi-ooxml-schemas:${versions.poi}"
   compile "commons-codec:commons-codec:${versions.commonscodec}"
   compile 'org.apache.xmlbeans:xmlbeans:2.6.0'
+  compile 'org.apache.commons:commons-collections4:4.1'
   // MS Office
   compile "org.apache.poi:poi-scratchpad:${versions.poi}"
   // Apple iWork
-  compile 'org.apache.commons:commons-compress:1.10'
+  compile 'org.apache.commons:commons-compress:1.14'
   // Outlook documents
   compile "org.apache.james:apache-mime4j-core:${versions.mime4j}"
   compile "org.apache.james:apache-mime4j-dom:${versions.mime4j}"
@@ -317,8 +320,6 @@ thirdPartyAudit.excludes = [
   'com.microsoft.schemas.office.powerpoint.CTRel',
   'com.microsoft.schemas.office.visio.x2012.main.AttachedToolbarsType',
   'com.microsoft.schemas.office.visio.x2012.main.ColorsType',
-  'com.microsoft.schemas.office.visio.x2012.main.ConnectType',
-  'com.microsoft.schemas.office.visio.x2012.main.ConnectsType',
   'com.microsoft.schemas.office.visio.x2012.main.CpType',
   'com.microsoft.schemas.office.visio.x2012.main.CustomMenusFileType',
   'com.microsoft.schemas.office.visio.x2012.main.CustomToolbarsFileType',
@@ -346,6 +347,7 @@ thirdPartyAudit.excludes = [
   'com.microsoft.schemas.office.visio.x2012.main.TpType',
   'com.microsoft.schemas.office.visio.x2012.main.TriggerType',
   'com.microsoft.schemas.office.visio.x2012.main.impl.CellTypeImpl$1RefByList',
+  'com.microsoft.schemas.office.visio.x2012.main.impl.ConnectsTypeImpl$1ConnectList',
   'com.microsoft.schemas.office.visio.x2012.main.impl.MastersTypeImpl$1MasterList',
   'com.microsoft.schemas.office.visio.x2012.main.impl.MastersTypeImpl$1MasterShortcutList',
   'com.microsoft.schemas.office.visio.x2012.main.impl.PagesTypeImpl$1PageList',
@@ -481,6 +483,7 @@ thirdPartyAudit.excludes = [
   'com.pff.PSTFile',
   'com.pff.PSTFolder',
   'com.pff.PSTMessage',
+  'com.pff.PSTRecipient',
   'com.rometools.rome.feed.synd.SyndContent',
   'com.rometools.rome.feed.synd.SyndEntry',
   'com.rometools.rome.feed.synd.SyndFeed',
@@ -511,13 +514,14 @@ thirdPartyAudit.excludes = [
   'javax.servlet.ServletContextEvent',
   'javax.servlet.ServletContextListener',
   'javax.ws.rs.core.Response',
+  'javax.ws.rs.core.UriBuilder',
   'junit.framework.TestCase',
   'opennlp.tools.namefind.NameFinderME',
   'opennlp.tools.namefind.TokenNameFinderModel',
+  'opennlp.tools.sentiment.SentimentME',
+  'opennlp.tools.sentiment.SentimentModel',
   'opennlp.tools.util.Span',
   'org.apache.avalon.framework.logger.Logger',
-  'org.apache.commons.collections4.ListValuedMap',
-  'org.apache.commons.collections4.multimap.ArrayListValuedHashMap',
   'org.apache.commons.csv.CSVFormat',
   'org.apache.commons.csv.CSVParser',
   'org.apache.commons.csv.CSVRecord',
@@ -526,6 +530,7 @@ thirdPartyAudit.excludes = [
   'org.apache.commons.exec.ExecuteWatchdog',
   'org.apache.commons.exec.PumpStreamHandler',
   'org.apache.commons.exec.environment.EnvironmentUtils',
+  'org.apache.commons.lang.StringUtils',
   'org.apache.ctakes.typesystem.type.refsem.UmlsConcept',
   'org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation',
   'org.apache.cxf.jaxrs.client.WebClient',
@@ -584,6 +589,7 @@ thirdPartyAudit.excludes = [
   'org.apache.xml.security.Init',
   'org.apache.xml.security.c14n.Canonicalizer',
   'org.apache.xml.security.utils.Base64',
+  'org.brotli.dec.BrotliInputStream',
   'org.etsi.uri.x01903.v13.AnyType',
   'org.etsi.uri.x01903.v13.ClaimedRolesListType',
   'org.etsi.uri.x01903.v13.CounterSignatureType',
@@ -625,12 +631,15 @@ thirdPartyAudit.excludes = [
   'org.etsi.uri.x01903.v13.impl.UnsignedSignaturePropertiesTypeImpl$1RevocationValuesList',
   'org.etsi.uri.x01903.v13.impl.UnsignedSignaturePropertiesTypeImpl$1SigAndRefsTimeStampList',
   'org.etsi.uri.x01903.v13.impl.UnsignedSignaturePropertiesTypeImpl$1SignatureTimeStampList',
+  'org.etsi.uri.x01903.v14.ValidationDataType$Factory',
+  'org.etsi.uri.x01903.v14.ValidationDataType',
   'org.json.JSONArray',
   'org.json.JSONObject',
   'org.json.XML',
   'org.json.simple.JSONArray',
   'org.json.simple.JSONObject',
   'org.json.simple.parser.JSONParser',
+  'org.junit.Assert',
   'org.junit.Test',
   'org.junit.internal.TextListener',
   'org.junit.runner.JUnitCore',
@@ -801,6 +810,7 @@ thirdPartyAudit.excludes = [
   'org.openxmlformats.schemas.drawingml.x2006.main.CTSupplementalFont',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTTableBackgroundStyle',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTTableCellBorderStyle',
+  'org.openxmlformats.schemas.drawingml.x2006.main.CTTableStyleTextStyle',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTTextBlipBullet',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTTextBulletColorFollowText',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTTextBulletSizeFollowText',
@@ -815,6 +825,7 @@ thirdPartyAudit.excludes = [
   'org.openxmlformats.schemas.drawingml.x2006.main.STBlackWhiteMode',
   'org.openxmlformats.schemas.drawingml.x2006.main.STBlipCompression',
   'org.openxmlformats.schemas.drawingml.x2006.main.STFixedAngle',
+  'org.openxmlformats.schemas.drawingml.x2006.main.STOnOffStyleType$Enum',
   'org.openxmlformats.schemas.drawingml.x2006.main.STPanose',
   'org.openxmlformats.schemas.drawingml.x2006.main.STPathFillMode',
   'org.openxmlformats.schemas.drawingml.x2006.main.STPresetPatternVal',
@@ -1202,11 +1213,8 @@ thirdPartyAudit.excludes = [
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTMissing',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTNumber',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTOleLink',
-  'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTOleObjects',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTOleSize',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPCDKPIs',
-  'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPageField',
-  'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPageFields',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPhoneticRun',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPivotFilters',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPivotHierarchies',
@@ -1231,9 +1239,11 @@ thirdPartyAudit.excludes = [
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTX',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.STCellSpans',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.STDataValidationImeMode',
+  'org.openxmlformats.schemas.spreadsheetml.x2006.main.STDvAspect',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.STFieldSortType',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.STGuid',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.STObjects',
+  'org.openxmlformats.schemas.spreadsheetml.x2006.main.STOleUpdate',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.STPhoneticAlignment',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.STPhoneticType',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.STPrintError',
@@ -1291,7 +1301,9 @@ thirdPartyAudit.excludes = [
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTMapInfoImpl$1SchemaList',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTMergeCellsImpl$1MergeCellList',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTNumFmtsImpl$1NumFmtList',
+  'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTOleObjectsImpl$1OleObjectList',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTPageBreakImpl$1BrkList',
+  'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTPageFieldsImpl$1PageFieldList',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTPivotCacheRecordsImpl$1RList',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTPivotCachesImpl$1PivotCacheList',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTPivotFieldsImpl$1PivotFieldList',
@@ -1321,6 +1333,7 @@ thirdPartyAudit.excludes = [
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSharedItemsImpl$1NList',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSharedItemsImpl$1SList',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSheetDataImpl$1RowList',
+  'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSheetsImpl$1SheetList',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSheetViewImpl$1PivotSelectionList',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSheetViewImpl$1SelectionList',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSheetViewsImpl$1SheetViewList',
@@ -2035,26 +2048,10 @@ thirdPartyAudit.excludes = [
   'org.osgi.framework.ServiceRegistration',
   'org.osgi.util.tracker.ServiceTracker',
   'org.osgi.util.tracker.ServiceTrackerCustomizer',
-  'org.slf4j.Logger',
-  'org.slf4j.LoggerFactory',
+  'org.slf4j.impl.StaticLoggerBinder',
+  'org.slf4j.impl.StaticMDCBinder',
+  'org.slf4j.impl.StaticMarkerBinder',
   'org.sqlite.SQLiteConfig',
-  'org.tukaani.xz.ARMOptions',
-  'org.tukaani.xz.ARMThumbOptions',
-  'org.tukaani.xz.DeltaOptions',
-  'org.tukaani.xz.FilterOptions',
-  'org.tukaani.xz.FinishableWrapperOutputStream',
-  'org.tukaani.xz.IA64Options',
-  'org.tukaani.xz.LZMA2InputStream',
-  'org.tukaani.xz.LZMA2Options',
-  'org.tukaani.xz.LZMAInputStream',
-  'org.tukaani.xz.PowerPCOptions',
-  'org.tukaani.xz.SPARCOptions',
-  'org.tukaani.xz.SingleXZInputStream',
-  'org.tukaani.xz.UnsupportedOptionsException',
-  'org.tukaani.xz.X86Options',
-  'org.tukaani.xz.XZ',
-  'org.tukaani.xz.XZInputStream',
-  'org.tukaani.xz.XZOutputStream',
   'org.w3.x2000.x09.xmldsig.KeyInfoType',
   'org.w3.x2000.x09.xmldsig.SignatureMethodType',
   'org.w3.x2000.x09.xmldsig.SignatureValueType',

+ 1 - 0
plugins/ingest-attachment/licenses/commons-collections4-4.1.jar.sha1

@@ -0,0 +1 @@
+a4cf4688fe1c7e3a63aa636cc96d013af537768e

+ 201 - 0
plugins/ingest-attachment/licenses/commons-collections4-LICENSE.txt

@@ -0,0 +1,201 @@
+      Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

+ 5 - 0
plugins/ingest-attachment/licenses/commons-collections4-NOTICE.txt

@@ -0,0 +1,5 @@
+Apache Commons Collections
+Copyright 2001-2015 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).

+ 0 - 1
plugins/ingest-attachment/licenses/commons-compress-1.10.jar.sha1

@@ -1 +0,0 @@
-5eeb27c57eece1faf2d837868aeccc94d84dcc9a

+ 1 - 0
plugins/ingest-attachment/licenses/commons-compress-1.14.jar.sha1

@@ -0,0 +1 @@
+7b18320d668ab080758bf5383d6d8fcf750babce

+ 0 - 1
plugins/ingest-attachment/licenses/poi-3.15.jar.sha1

@@ -1 +0,0 @@
-965bba8899988008bb2341e300347de62aad5391

+ 1 - 0
plugins/ingest-attachment/licenses/poi-3.16.jar.sha1

@@ -0,0 +1 @@
+ad21c123ee5d6b5b2a8f0d4ed23b3ffe6759a889

+ 0 - 1
plugins/ingest-attachment/licenses/poi-ooxml-3.15.jar.sha1

@@ -1 +0,0 @@
-e2800856735b07b8edd417aee07685470216a00f

+ 1 - 0
plugins/ingest-attachment/licenses/poi-ooxml-3.16.jar.sha1

@@ -0,0 +1 @@
+76e20fe22404cc4da55ddfdaaaadee32bbfa3bdd

+ 0 - 1
plugins/ingest-attachment/licenses/poi-ooxml-schemas-3.15.jar.sha1

@@ -1 +0,0 @@
-de4a50ca39de48a19606b35644ecadb2f733c479

+ 1 - 0
plugins/ingest-attachment/licenses/poi-ooxml-schemas-3.16.jar.sha1

@@ -0,0 +1 @@
+9828a49307fc6bebfd42185b677d88b6e4994c63

+ 0 - 1
plugins/ingest-attachment/licenses/poi-scratchpad-3.15.jar.sha1

@@ -1 +0,0 @@
-f1db76ae4a9389fa4339dc3b7f8208aa82c72b04

+ 1 - 0
plugins/ingest-attachment/licenses/poi-scratchpad-3.16.jar.sha1

@@ -0,0 +1 @@
+69d6dda524e38a491b362d0f94ef74a514faf70a

+ 1 - 0
plugins/ingest-attachment/licenses/slf4j-api-1.6.2.jar.sha1

@@ -0,0 +1 @@
+8619e95939167fb37245b5670135e4feb0ec7d50

+ 21 - 0
plugins/ingest-attachment/licenses/slf4j-api-LICENSE.txt

@@ -0,0 +1,21 @@
+Copyright (c) 2004-2014 QOS.ch
+All rights reserved.
+
+Permission is hereby granted, free  of charge, to any person obtaining
+a  copy  of this  software  and  associated  documentation files  (the
+"Software"), to  deal in  the Software without  restriction, including
+without limitation  the rights to  use, copy, modify,  merge, publish,
+distribute,  sublicense, and/or sell  copies of  the Software,  and to
+permit persons to whom the Software  is furnished to do so, subject to
+the following conditions:
+
+The  above  copyright  notice  and  this permission  notice  shall  be
+included in all copies or substantial portions of the Software.
+
+THE  SOFTWARE IS  PROVIDED  "AS  IS", WITHOUT  WARRANTY  OF ANY  KIND,
+EXPRESS OR  IMPLIED, INCLUDING  BUT NOT LIMITED  TO THE  WARRANTIES OF
+MERCHANTABILITY,    FITNESS    FOR    A   PARTICULAR    PURPOSE    AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE,  ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

+ 0 - 0
plugins/ingest-attachment/licenses/slf4j-api-NOTICE.txt


+ 0 - 1
plugins/ingest-attachment/licenses/tika-core-1.14.jar.sha1

@@ -1 +0,0 @@
-afff8f1774994aa973ef90bc8d38ddf089b9d6d9

+ 1 - 0
plugins/ingest-attachment/licenses/tika-core-1.15.jar.sha1

@@ -0,0 +1 @@
+17850c2224e4e3867e588060dc8ce6ba3bcfab2a

+ 0 - 1
plugins/ingest-attachment/licenses/tika-parsers-1.14.jar.sha1

@@ -1 +0,0 @@
-d26c10a9e7d116366562aa260013a30a55ff4e8f

+ 1 - 0
plugins/ingest-attachment/licenses/tika-parsers-1.15.jar.sha1

@@ -0,0 +1 @@
+aa07c2cda051709e5fe70fd6e244386fc93b0a1e

+ 1 - 0
plugins/ingest-attachment/licenses/xz-1.6.jar.sha1

@@ -0,0 +1 @@
+05b6f921f1810bdf90e25471968f741f87168b64

+ 9 - 0
plugins/ingest-attachment/licenses/xz-LICENSE.txt

@@ -0,0 +1,9 @@
+
+Licensing of XZ for Java
+========================
+
+    All the files in this package have been written by Lasse Collin
+    and/or Igor Pavlov. All these files have been put into the
+    public domain. You can do whatever you want with these files.
+
+    This software is provided "as is", without any warranty.

+ 1 - 0
plugins/ingest-attachment/licenses/xz-NOTICE.txt

@@ -0,0 +1 @@
+

+ 11 - 3
plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/TikaImpl.java

@@ -50,10 +50,10 @@ import java.security.PrivilegedExceptionAction;
 import java.security.ProtectionDomain;
 import java.security.SecurityPermission;
 import java.util.Arrays;
-import java.util.Collections;
+import java.util.HashSet;
 import java.util.LinkedHashSet;
-import java.util.PropertyPermission;
 import java.util.Set;
+import java.util.PropertyPermission;
 
 /**
  * Runs tika with limited parsers and limited permissions.
@@ -63,7 +63,15 @@ import java.util.Set;
 final class TikaImpl {
 
     /** Exclude some formats */
-    private static final Set<MediaType> EXCLUDES = Collections.singleton(MediaType.application("x-tika-ooxml"));
+    private static final Set<MediaType> EXCLUDES = new HashSet<>(Arrays.asList(
+        MediaType.application("vnd.ms-visio.drawing"),
+        MediaType.application("vnd.ms-visio.drawing.macroenabled.12"),
+        MediaType.application("vnd.ms-visio.stencil"),
+        MediaType.application("vnd.ms-visio.stencil.macroenabled.12"),
+        MediaType.application("vnd.ms-visio.template"),
+        MediaType.application("vnd.ms-visio.template.macroenabled.12"),
+        MediaType.application("vnd.ms-visio.drawing")
+    ));
 
     /** subset of parsers for types we support */
     private static final Parser PARSERS[] = new Parser[] {