Browse Source

improving timeouts

Jhonny Mertz 1 year ago
parent
commit
5fc09374ae

+ 20 - 4
README.md

@@ -16,7 +16,7 @@ If you are using Gradle/Maven, see example below:
 In your `build.gradle`:
 ```groovy
 dependencies {
-    compile 'com.github.jhonnymertz:java-wkhtmltopdf-wrapper:1.2.0-RELEASE'
+    compile 'com.github.jhonnymertz:java-wkhtmltopdf-wrapper:1.3.0-RELEASE'
 }
 ```
 
@@ -26,7 +26,7 @@ In your `pom.xml`:
 <dependency>
     <groupId>com.github.jhonnymertz</groupId>
     <artifactId>java-wkhtmltopdf-wrapper</artifactId>
-    <version>1.2.0-RELEASE</version>
+    <version>1.3.0-RELEASE</version>
 </dependency>
 ```
 
@@ -163,6 +163,19 @@ pdf.setSuccessValues(Arrays.asList(0, 1));
 pdf.saveAs("output.pdf");
 ```
 
+### Timeouts
+
+There are often situations in which the wkhtmltopdf may take longer to generate the file. By default, the library uses a 10 seconds timeout for all the internal processes depending on wkhtmltopdf. However, in some situation 10s may not be sufficient, thus you can customize this value:
+
+```java
+Pdf pdf = new Pdf();
+pdf.addPageFromUrl("http://www.google.com");
+
+pdf.setTimeout(30); // 30 seconds timeout
+
+pdf.saveAs("output.pdf");
+```
+
 ### Cleaning up temporary files
 
 After the PDF generation, the library automatically cleans up the temporary files created. However, there may be situations in which the `Pdf` object is created but no PDF is generated. To avoid increasing the temp folder size and having problems, you can force the deletion of all temporary files created by the library by:
@@ -180,8 +193,11 @@ Known issues
 ------------
 
 **Output of wkhtmltopdf is being added to resulting pdf** ([Issue #19](https://github.com/jhonnymertz/java-wkhtmltopdf-wrapper/issues/19))
-- Starting from 1.1.10-RELEASE version, there is a method `saveAsDirect(String path)`, which executes wkhtmltopdf passing the `path` as output for wkhtmltopdf, instead of the standard input `-`. This saves the results directly to the specified file `path`.
-
+- Starting from 1.1.10-RELEASE version, there is a method `saveAsDirect(String path)`, which executes wkhtmltopdf passing the `path` as output for wkhtmltopdf, instead of the standard input `-`. This saves the results directly to the specified file `path` without handling it with internal stdout.
+**Processes taking longer due to some wkhtmltopdf parameters, such as `window.status`, possibly resulting in timeouts** ([Pull #131](https://github.com/jhonnymertz/java-wkhtmltopdf-wrapper/pull/131))
+- Some parameters may cause wkhtmltopdf think that the page has not loaded and it will wait for longer, causing some internal processes of the library to be blocked. To avoid blocking the internal processes of the library, the given timeout (default 10s) was forcibly set to all the internal processes. This value can be increased by using `setTimeout(int seconds)`. Please refer to the [Timeouts](#timeouts) section for more information.
+- This issue is often caused by developers unfamiliar with wkhtmltopdf features and not related to the library itself. If you are experiencing something related, check the command being generated and the parameters you are using. Try to remove them one by one to find the one causing the issue. 
+  
 **Because this library relies on `wkhtmltopdf`, it does not support concurrent PDF generations.**
 
 License

+ 1 - 1
pom.xml

@@ -3,7 +3,7 @@
     <modelVersion>4.0.0</modelVersion>
     <groupId>com.github.jhonnymertz</groupId>
     <artifactId>java-wkhtmltopdf-wrapper</artifactId>
-    <version>1.2.0-RELEASE</version>
+    <version>1.3.0-RELEASE</version>
     <packaging>jar</packaging>
 
     <name>Java WkHtmlToPdf Wrapper</name>

+ 32 - 15
src/main/java/com/github/jhonnymertz/wkhtmltopdf/wrapper/Pdf.java

@@ -3,7 +3,13 @@ package com.github.jhonnymertz.wkhtmltopdf.wrapper;
 import com.github.jhonnymertz.wkhtmltopdf.wrapper.configurations.FilenameFilterConfig;
 import com.github.jhonnymertz.wkhtmltopdf.wrapper.configurations.WrapperConfig;
 import com.github.jhonnymertz.wkhtmltopdf.wrapper.exceptions.PDFExportException;
-import com.github.jhonnymertz.wkhtmltopdf.wrapper.objects.*;
+import com.github.jhonnymertz.wkhtmltopdf.wrapper.exceptions.PDFGenerationException;
+import com.github.jhonnymertz.wkhtmltopdf.wrapper.exceptions.PDFTimeoutException;
+import com.github.jhonnymertz.wkhtmltopdf.wrapper.objects.BaseObject;
+import com.github.jhonnymertz.wkhtmltopdf.wrapper.objects.Cover;
+import com.github.jhonnymertz.wkhtmltopdf.wrapper.objects.Page;
+import com.github.jhonnymertz.wkhtmltopdf.wrapper.objects.SourceType;
+import com.github.jhonnymertz.wkhtmltopdf.wrapper.objects.TableOfContents;
 import com.github.jhonnymertz.wkhtmltopdf.wrapper.params.Param;
 import com.github.jhonnymertz.wkhtmltopdf.wrapper.params.Params;
 import org.apache.commons.io.FileUtils;
@@ -22,7 +28,12 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
-import java.util.concurrent.*;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 import java.util.stream.Collectors;
 
 /**
@@ -52,7 +63,7 @@ public class Pdf {
     /**
      * The constant TEMPORARY_FILE_PREFIX.
      */
-    public static String TEMPORARY_FILE_PREFIX = "java-wkhtmltopdf-wrapper";
+    public static final String TEMPORARY_FILE_PREFIX = "java-wkhtmltopdf-wrapper";
 
     private String outputFilename = null;
 
@@ -315,7 +326,7 @@ public class Pdf {
             if (!process.waitFor(this.timeout, TimeUnit.SECONDS)) {
                 process.destroy();
                 logger.error("PDF generation failed by defined timeout of {}s, command: {}", timeout, command);
-                throw new RuntimeException("PDF generation timeout by user. Try to increase the timeout.");
+                throw new PDFTimeoutException(command, this.timeout, getFuture(inputStreamToByteArray));
             }
 
             if (!successValues.contains(process.exitValue())) {
@@ -359,7 +370,7 @@ public class Pdf {
         // Check if TOC is always first
         if (wrapperConfig.getAlwaysPutTocFirst()) {
             // remove and add TOC to top
-            List<BaseObject> tocObjects = objects.stream().filter((o) -> o instanceof TableOfContents).collect(Collectors.toList()); // .getObjectIdentifier().equalsIgnoreCase("toc")
+            List<BaseObject> tocObjects = objects.stream().filter(TableOfContents.class::isInstance).collect(Collectors.toList());
             objects.removeAll(tocObjects);
             objects.addAll(0, tocObjects);
         }
@@ -374,24 +385,30 @@ public class Pdf {
         return commandLine.toArray(new String[commandLine.size()]);
     }
 
+    /**
+     * Converts an InputStream to a byte array
+     *
+     * @param input the input stream
+     * @return the byte array
+     */
     private Callable<byte[]> streamToByteArrayTask(final InputStream input) {
-        return new Callable<byte[]>() {
-            public byte[] call() throws Exception {
-                return IOUtils.toByteArray(input);
-            }
-        };
+        return () -> IOUtils.toByteArray(input);
     }
 
     private byte[] getFuture(Future<byte[]> future) {
         try {
             return future.get(this.timeout, TimeUnit.SECONDS);
+        } catch (final TimeoutException e) {
+            logger.error("PDF generation failed by defined timeout of {}s", timeout);
+            throw new PDFTimeoutException(this.timeout, e);
         } catch (final Exception e) {
-            throw new RuntimeException(e);
+            logger.error("Something went wrong while generating PDF.", e);
+            throw new PDFGenerationException(e);
         }
     }
 
     /**
-     * CLeans up temporary files used to generate the wkhtmltopdf command
+     * Cleans up temporary files used to generate the wkhtmltopdf command
      */
     private void cleanTempFiles() {
         logger.debug("Cleaning up temporary files...");
@@ -401,9 +418,9 @@ public class Pdf {
                 if (page.getType().equals(SourceType.htmlAsString)) {
                     try {
                         Path p = Paths.get(page.getFilePath());
-                        logger.debug("Delete temp file at: " + page.getFilePath() + " " + Files.deleteIfExists(p));
+                        logger.debug("Delete temp file at: '{}' Status: '{}'", page.getFilePath(), Files.deleteIfExists(p));
                     } catch (IOException ex) {
-                        logger.warn("Couldn't delete temp file " + page.getFilePath());
+                        logger.warn("Couldn't delete temp file '{}'", page.getFilePath(), ex);
                     }
                 }
             }
@@ -419,7 +436,7 @@ public class Pdf {
         final File[] files = folder.listFiles(new FilenameFilterConfig());
         for (final File file : files) {
             if (!file.delete()) {
-                logger.warn("Couldn't delete temp file " + file.getAbsolutePath());
+                logger.warn("Couldn't delete temp file '{}'", file.getAbsolutePath());
             }
         }
         logger.debug("{} temporary files removed.", files.length);

+ 16 - 0
src/main/java/com/github/jhonnymertz/wkhtmltopdf/wrapper/exceptions/PDFGenerationException.java

@@ -0,0 +1,16 @@
+package com.github.jhonnymertz.wkhtmltopdf.wrapper.exceptions;
+
+/**
+ * Exception to describe and track pdf generation process errors
+ */
+public class PDFGenerationException extends RuntimeException {
+
+    /**
+     * Instantiates a new Pdf timeout exception when future process errors happens
+     *
+     * @param e the original exception
+     */
+    public PDFGenerationException(final Exception e) {
+        super(e);
+    }
+}

+ 84 - 0
src/main/java/com/github/jhonnymertz/wkhtmltopdf/wrapper/exceptions/PDFTimeoutException.java

@@ -0,0 +1,84 @@
+package com.github.jhonnymertz.wkhtmltopdf.wrapper.exceptions;
+
+/**
+ * Exception to describe and track pdf exporting errors due to timeout
+ */
+public class PDFTimeoutException extends RuntimeException {
+
+    /**
+     * The PDF command which originated the exception
+     */
+    private String command;
+
+    /**
+     * The default exit status code for timeout of the PDF command
+     */
+    private int timeout;
+
+    /**
+     * The output of the PDF command
+     */
+    private byte[] out;
+
+    /**
+     * Instantiates a new Pdf timeout exception when wkhtmltopdf timeout happens
+     *
+     * @param command the command
+     * @param timeout the timeout
+     * @param out     the out
+     */
+    public PDFTimeoutException(final String command, final int timeout, final byte[] out) {
+        super(String.format("Process '%s' timeout after %d seconds. " +
+                "Try to increase the timeout via the 'PDF.setTimeout()' method.", command, timeout));
+        this.command = command;
+        this.timeout = timeout;
+        this.out = out;
+    }
+
+    /**
+     * Instantiates a new Pdf timeout exception when future process timeout happens
+     *
+     * @param timeout the timeout
+     * @param e       the exception
+     */
+    public PDFTimeoutException(final int timeout, final Exception e) {
+        super(String.format("Process timeout after %s seconds. " +
+                "Try to increase the timeout via the 'PDF.setTimeout()' method.", timeout), e);
+    }
+
+    /**
+     * Gets command.
+     *
+     * @return the command
+     */
+    public String getCommand() {
+        return command;
+    }
+
+    /**
+     * Gets exit status.
+     *
+     * @return the exit status
+     */
+    public int getExitStatus() {
+        return 124;
+    }
+
+    /**
+     * Get out byte [ ].
+     *
+     * @return the byte [ ]
+     */
+    public byte[] getOut() {
+        return out;
+    }
+
+    /**
+     * Get timeout.
+     *
+     * @return the timeout
+     */
+    public int getTimeout() {
+        return timeout;
+    }
+}

+ 26 - 12
src/test/java/com/github/jhonnymertz/wkhtmltopdf/wrapper/integration/PdfIntegrationTests.java

@@ -3,6 +3,7 @@ package com.github.jhonnymertz.wkhtmltopdf.wrapper.integration;
 import com.github.jhonnymertz.wkhtmltopdf.wrapper.Pdf;
 import com.github.jhonnymertz.wkhtmltopdf.wrapper.configurations.WrapperConfig;
 import com.github.jhonnymertz.wkhtmltopdf.wrapper.configurations.XvfbConfig;
+import com.github.jhonnymertz.wkhtmltopdf.wrapper.exceptions.PDFTimeoutException;
 import com.github.jhonnymertz.wkhtmltopdf.wrapper.objects.Page;
 import com.github.jhonnymertz.wkhtmltopdf.wrapper.objects.TableOfContents;
 import com.github.jhonnymertz.wkhtmltopdf.wrapper.params.Param;
@@ -22,6 +23,7 @@ import static org.hamcrest.core.IsNot.not;
 import static org.hamcrest.core.StringContains.containsString;
 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
 
@@ -249,24 +251,36 @@ class PdfIntegrationTests {
     }
 
     @Test
-    void testPdfWithWindowStatusTimeoutParameters() throws Exception {
+    void testPdfWithWindowStatusTimeoutParameters() {
         final String executable = WrapperConfig.findExecutable();
         Pdf pdf = new Pdf(new WrapperConfig(executable));
         pdf.addPageFromUrl("http://www.google.com");
 
+        /*
+         * Due to the absence of the `window.status` assignment on the `www.google.com` page, a timeout exception is certain to occur.
+         * This simulates a scenario where the `window.status` assignment fails.
+         *
+         */
         pdf.addParam(new Param("--window-status", "complete"));
 
-        String exceptionMessage = "";
-        try {
-            /*
-             * Due to the absence of the `window.status` assignment on the `www.google.com` page, a timeout exception is certain to occur.
-             * This simulates a scenario where the `window.status` assignment fails.
-             * */
-            pdf.getPDF();
-        } catch (RuntimeException e) {
-            exceptionMessage = e.getMessage();
-        }
-        assertThat("it should throw a PDF generation timeout exception", exceptionMessage, containsString("PDF generation timeout by user"));
+        final PDFTimeoutException pdfTimeoutException = assertThrows(PDFTimeoutException.class, pdf::getPDF);
+        assertThat("Message should contain timeout seconds", pdfTimeoutException.getMessage(),
+                containsString("timeout after 10 seconds"));
+        assertThat("Message should contain suggestion to increase", pdfTimeoutException.getMessage(),
+                containsString("Try to increase the timeout via the 'PDF.setTimeout()' method."));
+    }
+
+    @Test
+    void testPdfWithShortTimeoutParameters() {
+        final String executable = WrapperConfig.findExecutable();
+        Pdf pdf = new Pdf(new WrapperConfig(executable));
+        pdf.addPageFromUrl("http://www.google.com");
+        pdf.setTimeout(0);
 
+        final PDFTimeoutException pdfTimeoutException = assertThrows(PDFTimeoutException.class, pdf::getPDF);
+        assertThat("Message should contain timeout seconds", pdfTimeoutException.getMessage(),
+                containsString("timeout after 0 seconds"));
+        assertThat("Message should contain suggestion to increase", pdfTimeoutException.getMessage(),
+                containsString("Try to increase the timeout via the 'PDF.setTimeout()' method."));
     }
 }

+ 1 - 1
src/test/java/com/github/jhonnymertz/wkhtmltopdf/wrapper/unit/PdfTests.java

@@ -151,7 +151,7 @@ class PdfTests {
     }
 
     @Test
-    void testMulipleObjects() throws IOException {
+    void testMultipleObjects() throws IOException {
         wc.setAlwaysPutTocFirst(false);
         pdf.addCoverFromFile("cover.html");
         pdf.addPageFromFile("foreword.html");