Browse Source

Deps: Update ingest-attachment to latest libraries (#20710)

Also added a test to check for a with a regular PDF,
instead of only an encrypted one with expected exception.
Alexander Reelsen 9 năm trước cách đây
mục cha
commit
3c2e51d831
21 tập tin đã thay đổi với 60 bổ sung26 xóa
  1. 39 11
      plugins/ingest-attachment/build.gradle
  2. 0 1
      plugins/ingest-attachment/licenses/bcmail-jdk15on-1.54.jar.sha1
  3. 1 0
      plugins/ingest-attachment/licenses/bcmail-jdk15on-1.55.jar.sha1
  4. 0 1
      plugins/ingest-attachment/licenses/bcpkix-jdk15on-1.54.jar.sha1
  5. 1 0
      plugins/ingest-attachment/licenses/bcpkix-jdk15on-1.55.jar.sha1
  6. 0 1
      plugins/ingest-attachment/licenses/bcprov-jdk15on-1.54.jar.sha1
  7. 1 0
      plugins/ingest-attachment/licenses/bcprov-jdk15on-1.55.jar.sha1
  8. 0 1
      plugins/ingest-attachment/licenses/fontbox-2.0.1.jar.sha1
  9. 1 0
      plugins/ingest-attachment/licenses/fontbox-2.0.3.jar.sha1
  10. 0 1
      plugins/ingest-attachment/licenses/pdfbox-2.0.1.jar.sha1
  11. 1 0
      plugins/ingest-attachment/licenses/pdfbox-2.0.3.jar.sha1
  12. 0 1
      plugins/ingest-attachment/licenses/poi-3.15-beta1.jar.sha1
  13. 1 0
      plugins/ingest-attachment/licenses/poi-3.15.jar.sha1
  14. 0 1
      plugins/ingest-attachment/licenses/poi-ooxml-3.15-beta1.jar.sha1
  15. 1 0
      plugins/ingest-attachment/licenses/poi-ooxml-3.15.jar.sha1
  16. 0 1
      plugins/ingest-attachment/licenses/poi-ooxml-schemas-3.15-beta1.jar.sha1
  17. 1 0
      plugins/ingest-attachment/licenses/poi-ooxml-schemas-3.15.jar.sha1
  18. 0 1
      plugins/ingest-attachment/licenses/poi-scratchpad-3.15-beta1.jar.sha1
  19. 1 0
      plugins/ingest-attachment/licenses/poi-scratchpad-3.15.jar.sha1
  20. 12 6
      plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorTests.java
  21. BIN
      plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/sample-files/test.pdf

+ 39 - 11
plugins/ingest-attachment/build.gradle

@@ -24,9 +24,9 @@ esplugin {
 
 versions << [
   'tika': '1.13',
-  'pdfbox': '2.0.1',
-  'bouncycastle': '1.54',
-  'poi': '3.15-beta1'
+  'pdfbox': '2.0.3',
+  'bouncycastle': '1.55',
+  'poi': '3.15'
 ]
 
 dependencies {
@@ -216,7 +216,6 @@ thirdPartyAudit.excludes = [
   'com.microsoft.schemas.office.excel.impl.CTClientDataImpl$1VTEditList',
   'com.microsoft.schemas.office.excel.impl.CTClientDataImpl$1ValList',
   'com.microsoft.schemas.office.excel.impl.CTClientDataImpl$1ValidIdsList',
-  'com.microsoft.schemas.office.excel.impl.CTClientDataImpl$1VisibleList',
   'com.microsoft.schemas.office.excel.impl.CTClientDataImpl$1WidthMinList',
   'com.microsoft.schemas.office.excel.impl.CTClientDataImpl$2Accel2List',
   'com.microsoft.schemas.office.excel.impl.CTClientDataImpl$2AccelList',
@@ -506,6 +505,8 @@ thirdPartyAudit.excludes = [
   'opennlp.tools.namefind.TokenNameFinderModel',
   'opennlp.tools.util.Span',
   'org.apache.avalon.framework.logger.Logger',
+  'org.apache.commons.collections4.ListValuedMap',
+  'org.apache.commons.collections4.multimap.ArrayListValuedHashMap',
   'org.apache.commons.csv.CSVFormat',
   'org.apache.commons.csv.CSVParser',
   'org.apache.commons.csv.CSVRecord',
@@ -630,8 +631,6 @@ thirdPartyAudit.excludes = [
   'org.etsi.uri.x01903.v13.impl.UnsignedSignaturePropertiesTypeImpl$1RevocationValuesList',
   'org.etsi.uri.x01903.v13.impl.UnsignedSignaturePropertiesTypeImpl$1SigAndRefsTimeStampList',
   'org.etsi.uri.x01903.v13.impl.UnsignedSignaturePropertiesTypeImpl$1SignatureTimeStampList',
-  'org.etsi.uri.x01903.v14.ValidationDataType$Factory',
-  'org.etsi.uri.x01903.v14.ValidationDataType',
   'org.json.JSONArray',
   'org.json.JSONObject',
   'org.json.XML',
@@ -785,17 +784,16 @@ thirdPartyAudit.excludes = [
   'org.openxmlformats.schemas.drawingml.x2006.main.CTGlowEffect',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTGrayscaleEffect',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTGrayscaleTransform',
-  'org.openxmlformats.schemas.drawingml.x2006.main.CTGroupFillProperties',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTGroupLocking',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTHSLEffect',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTInnerShadowEffect',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTInverseGammaTransform',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTInverseTransform',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTLineJoinBevel',
+  'org.openxmlformats.schemas.drawingml.x2006.main.CTLineJoinMiterProperties',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTLuminanceEffect',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTObjectStyleDefaults',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTPath2DArcTo',
-  'org.openxmlformats.schemas.drawingml.x2006.main.CTPatternFillProperties',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTPolarAdjustHandle',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTPositiveFixedAngle',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTPresetShadowEffect',
@@ -808,7 +806,7 @@ thirdPartyAudit.excludes = [
   'org.openxmlformats.schemas.drawingml.x2006.main.CTSoftEdgesEffect',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTSupplementalFont',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTTableBackgroundStyle',
-  'org.openxmlformats.schemas.drawingml.x2006.main.CTTablePartStyle',
+  'org.openxmlformats.schemas.drawingml.x2006.main.CTTableCellBorderStyle',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTTextBlipBullet',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTTextBulletColorFollowText',
   'org.openxmlformats.schemas.drawingml.x2006.main.CTTextBulletSizeFollowText',
@@ -823,9 +821,10 @@ thirdPartyAudit.excludes = [
   'org.openxmlformats.schemas.drawingml.x2006.main.STBlackWhiteMode',
   'org.openxmlformats.schemas.drawingml.x2006.main.STBlipCompression',
   'org.openxmlformats.schemas.drawingml.x2006.main.STFixedAngle',
-  'org.openxmlformats.schemas.drawingml.x2006.main.STGuid',
   'org.openxmlformats.schemas.drawingml.x2006.main.STPanose',
   'org.openxmlformats.schemas.drawingml.x2006.main.STPathFillMode',
+  'org.openxmlformats.schemas.drawingml.x2006.main.STPresetPatternVal',
+  'org.openxmlformats.schemas.drawingml.x2006.main.STPresetPatternVal$Enum',
   'org.openxmlformats.schemas.drawingml.x2006.main.STRectAlignment',
   'org.openxmlformats.schemas.drawingml.x2006.main.STTextColumnCount',
   'org.openxmlformats.schemas.drawingml.x2006.main.STTextNonNegativePoint',
@@ -933,6 +932,34 @@ thirdPartyAudit.excludes = [
   'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTPresetColorImpl$1SatOffList',
   'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTPresetColorImpl$1ShadeList',
   'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTPresetColorImpl$1TintList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1AlphaList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1AlphaModList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1AlphaOffList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1BlueList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1BlueModList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1BlueOffList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1CompList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1GammaList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1GrayList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1GreenList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1GreenModList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1GreenOffList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1HueList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1HueModList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1HueOffList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1InvGammaList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1InvList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1LumList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1LumModList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1LumOffList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1RedList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1RedModList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1RedOffList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1SatList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1SatModList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1SatOffList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1ShadeList',
+  'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1TintList',
   'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSRgbColorImpl$1AlphaList',
   'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSRgbColorImpl$1AlphaModList',
   'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSRgbColorImpl$1AlphaOffList',
@@ -1184,6 +1211,8 @@ thirdPartyAudit.excludes = [
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTOleObjects',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTOleSize',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPCDKPIs',
+  'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPageField',
+  'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPageFields',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPhoneticRun',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPivotFilters',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPivotHierarchies',
@@ -1269,7 +1298,6 @@ thirdPartyAudit.excludes = [
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTMergeCellsImpl$1MergeCellList',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTNumFmtsImpl$1NumFmtList',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTPageBreakImpl$1BrkList',
-  'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTPageFieldsImpl$1PageFieldList',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTPivotCacheRecordsImpl$1RList',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTPivotCachesImpl$1PivotCacheList',
   'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTPivotFieldsImpl$1PivotFieldList',

+ 0 - 1
plugins/ingest-attachment/licenses/bcmail-jdk15on-1.54.jar.sha1

@@ -1 +0,0 @@
-9d9b5432b4b29ef4a853223bc6e19379ef116cca

+ 1 - 0
plugins/ingest-attachment/licenses/bcmail-jdk15on-1.55.jar.sha1

@@ -0,0 +1 @@
+5cea2dada69b98698ea975a5c1dd3c91ac8ffbb6

+ 0 - 1
plugins/ingest-attachment/licenses/bcpkix-jdk15on-1.54.jar.sha1

@@ -1 +0,0 @@
-b11bfee99bb11eea344de6e4a07fe89212c55c02

+ 1 - 0
plugins/ingest-attachment/licenses/bcpkix-jdk15on-1.55.jar.sha1

@@ -0,0 +1 @@
+6392d8cba22b722c6570d660ca0b3921ff1bae4f

+ 0 - 1
plugins/ingest-attachment/licenses/bcprov-jdk15on-1.54.jar.sha1

@@ -1 +0,0 @@
-1acdedeb89f1d950d67b73d481eb7736df65eedb

+ 1 - 0
plugins/ingest-attachment/licenses/bcprov-jdk15on-1.55.jar.sha1

@@ -0,0 +1 @@
+935f2e57a00ec2c489cbd2ad830d4a399708f979

+ 0 - 1
plugins/ingest-attachment/licenses/fontbox-2.0.1.jar.sha1

@@ -1 +0,0 @@
-b9d4f0993e015f3f1ce0be9e7300cf62dd7a7f15

+ 1 - 0
plugins/ingest-attachment/licenses/fontbox-2.0.3.jar.sha1

@@ -0,0 +1 @@
+448ee588d0136121cf5c4dd397384cccb9db1ad7

+ 0 - 1
plugins/ingest-attachment/licenses/pdfbox-2.0.1.jar.sha1

@@ -1 +0,0 @@
-dbc69649118b7eff278f228c070a40ee559e1f62

+ 1 - 0
plugins/ingest-attachment/licenses/pdfbox-2.0.3.jar.sha1

@@ -0,0 +1 @@
+be7b09de93f7c7795c57f4fbf14db60ab93806b4

+ 0 - 1
plugins/ingest-attachment/licenses/poi-3.15-beta1.jar.sha1

@@ -1 +0,0 @@
-048bb8326b81323631d9ceb4236cfbd382e56da2

+ 1 - 0
plugins/ingest-attachment/licenses/poi-3.15.jar.sha1

@@ -0,0 +1 @@
+965bba8899988008bb2341e300347de62aad5391

+ 0 - 1
plugins/ingest-attachment/licenses/poi-ooxml-3.15-beta1.jar.sha1

@@ -1 +0,0 @@
-81085a47fdf0d74d473d605c6b3784e26731842e

+ 1 - 0
plugins/ingest-attachment/licenses/poi-ooxml-3.15.jar.sha1

@@ -0,0 +1 @@
+e2800856735b07b8edd417aee07685470216a00f

+ 0 - 1
plugins/ingest-attachment/licenses/poi-ooxml-schemas-3.15-beta1.jar.sha1

@@ -1 +0,0 @@
-f8bc979ad79908a99483337f1ca2edf78558ac20

+ 1 - 0
plugins/ingest-attachment/licenses/poi-ooxml-schemas-3.15.jar.sha1

@@ -0,0 +1 @@
+de4a50ca39de48a19606b35644ecadb2f733c479

+ 0 - 1
plugins/ingest-attachment/licenses/poi-scratchpad-3.15-beta1.jar.sha1

@@ -1 +0,0 @@
-f4e276aaf97a60a1156388c9e38069122b7ea914

+ 1 - 0
plugins/ingest-attachment/licenses/poi-scratchpad-3.15.jar.sha1

@@ -0,0 +1 @@
+f1db76ae4a9389fa4339dc3b7f8208aa82c72b04

+ 12 - 6
plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorTests.java

@@ -21,8 +21,8 @@ package org.elasticsearch.ingest.attachment;
 
 import org.apache.commons.io.IOUtils;
 import org.elasticsearch.ElasticsearchParseException;
-import org.elasticsearch.ingest.RandomDocumentPicks;
 import org.elasticsearch.ingest.IngestDocument;
+import org.elasticsearch.ingest.RandomDocumentPicks;
 import org.elasticsearch.test.ESTestCase;
 import org.junit.Before;
 
@@ -38,6 +38,7 @@ import java.util.Set;
 
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.greaterThan;
 import static org.hamcrest.Matchers.hasSize;
 import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.not;
@@ -125,12 +126,17 @@ public class AttachmentProcessorTests extends ESTestCase {
             is("application/vnd.openxmlformats-officedocument.wordprocessingml.document"));
     }
 
+    public void testPdf() throws Exception {
+        Map<String, Object> attachmentData = parseDocument("test.pdf", processor);
+        assertThat(attachmentData.get("content"),
+                is("This is a test, with umlauts, from München\n\nAlso contains newlines for testing.\n\nAnd one more."));
+        assertThat(attachmentData.get("content_type").toString(), is("application/pdf"));
+        assertThat(attachmentData.get("content_length"), is(notNullValue()));
+    }
+
     public void testEncryptedPdf() throws Exception {
-        try {
-            parseDocument("encrypted.pdf", processor);
-        } catch (ElasticsearchParseException e) {
-            assertThat(e.getDetailedMessage(), containsString("document is encrypted"));
-        }
+        ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> parseDocument("encrypted.pdf", processor));
+        assertThat(e.getDetailedMessage(), containsString("document is encrypted"));
     }
 
     public void testHtmlDocument() throws Exception {

BIN
plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/sample-files/test.pdf