|
@@ -47,6 +47,7 @@ import static org.hamcrest.Matchers.hasSize;
|
|
|
import static org.hamcrest.Matchers.is;
|
|
|
import static org.hamcrest.Matchers.not;
|
|
|
import static org.hamcrest.Matchers.notNullValue;
|
|
|
+import static org.hamcrest.Matchers.nullValue;
|
|
|
import static org.hamcrest.core.IsCollectionContaining.hasItem;
|
|
|
|
|
|
public class AttachmentProcessorTests extends ESTestCase {
|
|
@@ -130,6 +131,34 @@ public class AttachmentProcessorTests extends ESTestCase {
|
|
|
is("application/vnd.openxmlformats-officedocument.wordprocessingml.document"));
|
|
|
}
|
|
|
|
|
|
+ public void testWordDocumentWithVisioSchema() throws Exception {
|
|
|
+ Map<String, Object> attachmentData = parseDocument("issue-22077.docx", processor);
|
|
|
+
|
|
|
+ assertThat(attachmentData.keySet(), containsInAnyOrder("content", "language", "date", "author", "content_type",
|
|
|
+ "content_length"));
|
|
|
+ assertThat(attachmentData.get("content").toString(), containsString("Table of Contents"));
|
|
|
+ assertThat(attachmentData.get("language"), is("en"));
|
|
|
+ assertThat(attachmentData.get("date"), is("2015-01-06T18:07:00Z"));
|
|
|
+ assertThat(attachmentData.get("author"), is(notNullValue()));
|
|
|
+ assertThat(attachmentData.get("content_length"), is(notNullValue()));
|
|
|
+ assertThat(attachmentData.get("content_type").toString(),
|
|
|
+ is("application/vnd.openxmlformats-officedocument.wordprocessingml.document"));
|
|
|
+ }
|
|
|
+
|
|
|
+ public void testLegacyWordDocumentWithVisioSchema() throws Exception {
|
|
|
+ Map<String, Object> attachmentData = parseDocument("issue-22077.doc", processor);
|
|
|
+
|
|
|
+ assertThat(attachmentData.keySet(), containsInAnyOrder("content", "language", "date", "author", "content_type",
|
|
|
+ "content_length"));
|
|
|
+ assertThat(attachmentData.get("content").toString(), containsString("Table of Contents"));
|
|
|
+ assertThat(attachmentData.get("language"), is("en"));
|
|
|
+ assertThat(attachmentData.get("date"), is("2016-12-16T15:04:00Z"));
|
|
|
+ assertThat(attachmentData.get("author"), is(notNullValue()));
|
|
|
+ assertThat(attachmentData.get("content_length"), is(notNullValue()));
|
|
|
+ assertThat(attachmentData.get("content_type").toString(),
|
|
|
+ is("application/msword"));
|
|
|
+ }
|
|
|
+
|
|
|
public void testPdf() throws Exception {
|
|
|
Map<String, Object> attachmentData = parseDocument("test.pdf", processor);
|
|
|
assertThat(attachmentData.get("content"),
|
|
@@ -138,6 +167,13 @@ public class AttachmentProcessorTests extends ESTestCase {
|
|
|
assertThat(attachmentData.get("content_length"), is(notNullValue()));
|
|
|
}
|
|
|
|
|
|
+ public void testVisioIsExcluded() throws Exception {
|
|
|
+ Map<String, Object> attachmentData = parseDocument("issue-22077.vsdx", processor);
|
|
|
+ assertThat(attachmentData.get("content"), nullValue());
|
|
|
+ assertThat(attachmentData.get("content_type"), is("application/vnd.ms-visio.drawing"));
|
|
|
+ assertThat(attachmentData.get("content_length"), is(0L));
|
|
|
+ }
|
|
|
+
|
|
|
public void testEncryptedPdf() throws Exception {
|
|
|
ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> parseDocument("encrypted.pdf", processor));
|
|
|
assertThat(e.getDetailedMessage(), containsString("document is encrypted"));
|