Browse Source

fix encoding of special chars, add simple test

Patrick Brueckner 10 years ago
parent
commit
e43748db90

+ 14 - 1
pom.xml

@@ -12,6 +12,19 @@
 			<version>1.12.6</version>
 			<version>1.12.6</version>
 		</dependency>
 		</dependency>
 
 
-	</dependencies>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.11</version>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.pdfbox</groupId>
+            <artifactId>pdfbox</artifactId>
+            <version>1.8.9</version>
+        </dependency>
+
+    </dependencies>
 
 
 </project>
 </project>

+ 1 - 1
src/main/java/br/eti/mertz/wkhtmltopdf/wrapper/Pdf.java

@@ -94,7 +94,7 @@ public class Pdf implements PdfService {
         Process process = runtime.exec(command);
         Process process = runtime.exec(command);
         if(htmlFromString) {
         if(htmlFromString) {
             OutputStream stdInStream = process.getOutputStream();
             OutputStream stdInStream = process.getOutputStream();
-            stdInStream.write(htmlInput.getBytes());
+            stdInStream.write(htmlInput.getBytes("UTF-8"));
             stdInStream.close();
             stdInStream.close();
         }
         }
         InputStream stdOutStream = process.getInputStream();
         InputStream stdOutStream = process.getInputStream();

+ 36 - 0
src/tests/java/br/eti/mertz/wkhtmltopdf/wrapper/PdfTest.java

@@ -0,0 +1,36 @@
+package br.eti.mertz.wkhtmltopdf.wrapper;
+
+import org.apache.pdfbox.pdfparser.PDFParser;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.util.PDFTextStripper;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.ByteArrayInputStream;
+
+import static org.hamcrest.core.StringContains.containsString;
+
+public class PdfTest {
+
+    @Test
+    public void testPdfFromStringTo() throws Exception {
+
+        // GIVEN a html template containing special characters that java stores in utf-16 internally
+        Pdf pdf = new Pdf();
+        pdf.addHtmlInput("<html><head><meta charset=\"utf-8\"></head><h1>Müller</h1></html>");
+
+        // WHEN
+        byte[] pdfBytes =  pdf.getPDF();
+
+        PDFParser parser = new PDFParser(new ByteArrayInputStream(pdfBytes));
+
+        // that is a valid PDF (otherwise an IOException occurs)
+        parser.parse();
+        PDFTextStripper pdfTextStripper = new PDFTextStripper();
+        String pdfText = pdfTextStripper.getText(new PDDocument(parser.getDocument()));
+
+        Assert.assertThat("document should contain the creditorName", pdfText, containsString("Müller"));
+
+
+    }
+}