Browse Source

Provide access to new settings for HyphenationCompoundWordTokenFilter (#115585) (#116968)

Allow the new flags added in Lucene in the HyphenationCompoundWordTokenFilter

Adds access to the two new flags no_sub_matches and no_overlapping_matches.

Lucene issue: https://github.com/apache/lucene/issues/9231

Co-authored-by: Peter Straßer <peter.str@hotmail.de>
Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
John Wagster 10 months ago
parent
commit
e241221368

+ 6 - 0
docs/changelog/115585.yaml

@@ -0,0 +1,6 @@
+pr: 115459
+summary: Adds access to flags no_sub_matches and no_overlapping_matches to hyphenation-decompounder-tokenfilter
+area: Search
+type: enhancement
+issues:
+  - 97849

+ 12 - 0
docs/reference/analysis/tokenfilters/hyphenation-decompounder-tokenfilter.asciidoc

@@ -111,6 +111,18 @@ output. Defaults to `5`.
 (Optional, Boolean)
 If `true`, only include the longest matching subword. Defaults to `false`.
 
+`no_sub_matches`::
+(Optional, Boolean)
+If `true`, do not match sub tokens in tokens that are in the word list.
+Defaults to `false`.
+
+`no_overlapping_matches`::
+(Optional, Boolean)
+If `true`, do not allow overlapping tokens.
+Defaults to `false`.
+
+Typically users will only want to include one of the three flags as enabling `no_overlapping_matches` is the most restrictive and `no_sub_matches` is more restrictive than `only_longest_match`. When enabling a more restrictive option the state of the less restrictive does not have any effect.
+
 [[analysis-hyp-decomp-tokenfilter-customize]]
 ==== Customize and add to an analyzer
 

+ 8 - 1
modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/HyphenationCompoundWordTokenFilterFactory.java

@@ -28,6 +28,8 @@ import java.nio.file.Path;
  */
 public class HyphenationCompoundWordTokenFilterFactory extends AbstractCompoundWordTokenFilterFactory {
 
+    private final boolean noSubMatches;
+    private final boolean noOverlappingMatches;
     private final HyphenationTree hyphenationTree;
 
     HyphenationCompoundWordTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
@@ -46,6 +48,9 @@ public class HyphenationCompoundWordTokenFilterFactory extends AbstractCompoundW
         } catch (Exception e) {
             throw new IllegalArgumentException("Exception while reading hyphenation_patterns_path.", e);
         }
+
+        noSubMatches = settings.getAsBoolean("no_sub_matches", false);
+        noOverlappingMatches = settings.getAsBoolean("no_overlapping_matches", false);
     }
 
     @Override
@@ -57,7 +62,9 @@ public class HyphenationCompoundWordTokenFilterFactory extends AbstractCompoundW
             minWordSize,
             minSubwordSize,
             maxSubwordSize,
-            onlyLongestMatch
+            onlyLongestMatch,
+            noSubMatches,
+            noOverlappingMatches
         );
     }
 }

+ 56 - 9
modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CompoundAnalysisTests.java

@@ -31,6 +31,9 @@ import org.elasticsearch.test.IndexSettingsModule;
 import org.hamcrest.MatcherAssert;
 
 import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -42,6 +45,7 @@ import static org.hamcrest.Matchers.hasItems;
 import static org.hamcrest.Matchers.instanceOf;
 
 public class CompoundAnalysisTests extends ESTestCase {
+
     public void testDefaultsCompoundAnalysis() throws Exception {
         Settings settings = getJsonSettings();
         IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
@@ -63,6 +67,44 @@ public class CompoundAnalysisTests extends ESTestCase {
         assertWarnings("Setting [version] on analysis component [custom7] has no effect and is deprecated");
     }
 
+    public void testHyphenationDecompoundingAnalyzerOnlyLongestMatch() throws Exception {
+        Settings[] settingsArr = new Settings[] { getJsonSettings(), getYamlSettings() };
+        for (Settings settings : settingsArr) {
+            List<String> terms = analyze(settings, "hyphenationDecompoundingAnalyzerOnlyLongestMatch", "kaffeemaschine fussballpumpe");
+            MatcherAssert.assertThat(
+                terms,
+                hasItems("kaffeemaschine", "kaffee", "fee", "maschine", "fussballpumpe", "fussball", "ballpumpe", "pumpe")
+            );
+        }
+        assertWarnings("Setting [version] on analysis component [custom7] has no effect and is deprecated");
+    }
+
+    /**
+     * For example given a word list of: ["kaffee", "fee", "maschine"]
+     * no_sub_matches should prevent the token "fee" as a token in "kaffeemaschine".
+     */
+    public void testHyphenationDecompoundingAnalyzerNoSubMatches() throws Exception {
+        Settings[] settingsArr = new Settings[] { getJsonSettings(), getYamlSettings() };
+        for (Settings settings : settingsArr) {
+            List<String> terms = analyze(settings, "hyphenationDecompoundingAnalyzerNoSubMatches", "kaffeemaschine fussballpumpe");
+            MatcherAssert.assertThat(terms, hasItems("kaffeemaschine", "kaffee", "maschine", "fussballpumpe", "fussball", "ballpumpe"));
+        }
+        assertWarnings("Setting [version] on analysis component [custom7] has no effect and is deprecated");
+    }
+
+    /**
+     * For example given a word list of: ["fuss", "fussball", "ballpumpe", "ball", "pumpe"]
+     * no_overlapping_matches should prevent the token "ballpumpe" as a token in "fussballpumpe.
+     */
+    public void testHyphenationDecompoundingAnalyzerNoOverlappingMatches() throws Exception {
+        Settings[] settingsArr = new Settings[] { getJsonSettings(), getYamlSettings() };
+        for (Settings settings : settingsArr) {
+            List<String> terms = analyze(settings, "hyphenationDecompoundingAnalyzerNoOverlappingMatches", "kaffeemaschine fussballpumpe");
+            MatcherAssert.assertThat(terms, hasItems("kaffeemaschine", "kaffee", "maschine", "fussballpumpe", "fussball", "pumpe"));
+        }
+        assertWarnings("Setting [version] on analysis component [custom7] has no effect and is deprecated");
+    }
+
     private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException {
         IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
         AnalysisModule analysisModule = createAnalysisModule(settings);
@@ -92,20 +134,25 @@ public class CompoundAnalysisTests extends ESTestCase {
     }
 
     private Settings getJsonSettings() throws IOException {
-        String json = "/org/elasticsearch/analysis/common/test1.json";
-        return Settings.builder()
-            .loadFromStream(json, getClass().getResourceAsStream(json), false)
-            .put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
-            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
-            .build();
+        return getSettings("/org/elasticsearch/analysis/common/test1.json");
     }
 
     private Settings getYamlSettings() throws IOException {
-        String yaml = "/org/elasticsearch/analysis/common/test1.yml";
+        return getSettings("/org/elasticsearch/analysis/common/test1.yml");
+    }
+
+    private Settings getSettings(String filePath) throws IOException {
+        String hypenationRulesFileName = "de_DR.xml";
+        InputStream hypenationRules = getClass().getResourceAsStream(hypenationRulesFileName);
+        Path home = createTempDir();
+        Path config = home.resolve("config");
+        Files.createDirectory(config);
+        Files.copy(hypenationRules, config.resolve(hypenationRulesFileName));
+
         return Settings.builder()
-            .loadFromStream(yaml, getClass().getResourceAsStream(yaml), false)
+            .loadFromStream(filePath, getClass().getResourceAsStream(filePath), false)
             .put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
-            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+            .put(Environment.PATH_HOME_SETTING.getKey(), home.toString())
             .build();
     }
 }

+ 1130 - 0
modules/analysis-common/src/test/resources/org/elasticsearch/analysis/common/de_DR.xml

@@ -0,0 +1,1130 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<!DOCTYPE hyphenation-info SYSTEM "hyphenation.dtd">
+
+<!--
+  Hyphenation patterns for new German orthography.
+
+  Constructed by Carlos Villegas from TeX's dehyphn.tex file
+  obtained from http://www.ctan.org/tex-archive/language/hyphenation
+
+  This file may be used, distributed and modified only according to
+  LaTeX Project Public License:
+
+  ftp://ctan.tug.org/tex-archive/fonts/mathpazo/lppl.txt
+
+  Please report errors in this file to the following address:
+  fop-dev@xml.apache.org [or yours] and not to the address of the
+  original authors. You are not allowed to distribute this file under its
+  original name in the TeX distribution."
+
+  Original comments preserved:
+
+% This is `dehyphn.tex', revision level 28
+%
+% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% dehyphn   -   Vorlaeufige TeX-Trennmuster fuer die
+%               neue deutsche Rechtschreibung
+%
+%               Preliminary hyphenation patterns for the
+%               new German orthography
+% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+%
+% Copyright (C) 1988,1991 Rechenzentrum der Ruhr-Universitaet Bochum
+%               [german hyphen patterns]
+% Copyright (C) 1993,1994,1999 Bernd Raichle/DANTE e.V.
+%               [macros, adaption for TeX 2]
+% Copyright (C) 1998,1999 Walter Schmidt
+%               [adaption to new German orthography]
+%
+% IMPORTANT NOTICE:
+%
+% This program can be redistributed and/or modified under the terms
+% of the LaTeX Project Public License Distributed from CTAN
+% archives in directory macros/latex/base/lppl.txt; either
+% version 1 of the License, or any later version.
+%
+%
+% For use with TeX generated by
+%
+%          Norbert Schwarz
+%          Rechenzentrum Ruhr-Universitaet Bochum
+%          Universitaetsstrasse 150
+%          D-44721 Bochum, Germany
+%
+%
+% Adaption of these patterns for TeX, Version 2.x and 3.x and
+% all fonts in T1/`Cork'/EC/DC and/or OT1/CM encoding
+% (ghyph31.tex/dehypht.tex) by
+%
+%          Bernd Raichle
+%          Stettener Str. 73
+%          D-73732 Esslingen, Germany
+%   Email: raichle@Informatik.Uni-Stuttgart.DE
+%
+%
+% Adapted to new German orthography (gnhyph01.tex/dehyphn.tex) by
+%
+%          Walter Schmidt
+%          Schornbaumstr. 2
+%          D-91052 Erlangen, Germany
+%   Email: wschmi@ibm.net
+%
+% ***   Please, send error reports to the latter address only!   ***
+% ***                                                            ***
+% ***   Please, do _not_ report errors occuring in conjunction   ***
+% ***   with the traditional german hyphenation patterns         ***
+% ***   dehypht.tex or ghyph31.tex, too.                         ***
+%
+%
+% Additional documentation can be found near the end of this file.
+%
+%
+\message{German New Hyphenation Patterns `dehyphn' Rev.28 <1999-03-08> (WaS)}
+\message{(Formerly known under the name `gnhyph01'.)}
+%
+% Next we define some commands which are used inside the patterns.
+% To keep them local, we enclose the rest of the file in a group
+% (The \patterns command globally changes the hyphenation trie!).
+%
+\begingroup
+%
+%
+% Make sure that doublequote is not active:
+\catcode`\"=12
+%
+%
+% Because ^^e4 is used in the following macros which is read by
+% TeX 2.x as ^^e or %, the comment character of TeX, some trick
+% has to be found to avoid this problem.  The same is true for the
+% character ^^f or & in the TeX 2.x code.
+% Therefore in the code the exclamationmark ! is used instead of
+% the circumflex ^ and its \catcode is set appropriately
+% (normally \catcode`\!=12, in the code \catcode`\!=7).
+%
+% The following \catcode, \lccode assignments and macro definitions
+% are defined in such a way that the following \pattern{...} list
+% can be used for both, TeX 2.x and TeX 3.x.
+%
+% We first change the \lccode of ^^Y to make sure that we can
+% include this character in the hyphenation patterns.
+%
+\catcode`\^^Y=11 \lccode`\^^Y=`\^^Y
+%
+% Then we have to define some macros depending on the TeX version.
+% Therefore we have to distinguish TeX version 2.x and 3.x:
+%
+\ifnum`\@=`\^^40 % true => TeX 3.x
+  %
+  % For TeX 3:
+  %
+  % Assign appropriate \catcode and \lccode values for all
+  % accented characters used in the patterns (\uccode changes are
+  % not used within \patterns{...} and thus not necessary):
+  %
+  \catcode"E4=11 \catcode"C4=11 % \"a \"A
+  \catcode"F6=11 \catcode"D6=11 % \"o \"O
+  \catcode"FC=11 \catcode"DC=11 % \"u \"U
+  \catcode"FF=11 \catcode"DF=11 % \ss  SS
+  %
+  \lccode"C4="E4 \uccode"C4="C4  \lccode"E4="E4 \uccode"E4="C4
+  \lccode"D6="F6 \uccode"D6="D6  \lccode"F6="F6 \uccode"F6="D6
+  \lccode"DC="FC \uccode"DC="DC  \lccode"FC="FC \uccode"FC="DC
+  \lccode"DF="FF \uccode"DF="DF  \lccode"FF="FF \uccode"FF="DF
+  %
+  % In the following definitions we use ??xy instead of ^^xy
+  % to avoid errors when reading the following macro definitions
+  % with TeX 2.x (remember ^^e(4) is the comment character):
+  %
+  \catcode`\?=7
+  %
+  % Define the accent macro " in such a way that it
+  % expands to single letters in font encoding T1.
+  \catcode`\"=13
+  \def"#1{\ifx#1a??e4\else \ifx#1o??f6\else \ifx#1u??fc\else
+      \errmessage{Hyphenation pattern file corrupted!}%
+    \fi\fi\fi}
+  %
+  %   - patterns with umlauts are ok
+  \def\n#1{#1}
+  %
+  % For \ss which exists in T1 _and_ OT1 encoded fonts but with
+  % different glyph codes, duplicated patterns for both encodings
+  % are included.  Thus you can use these hyphenation patterns for
+  % T1 and OT1 encoded fonts:
+  %   - define \3 to be code `\^^ff (\ss in font encoding T1)
+  %   - define \9 to be code `\^^Y  (\ss in font encoding OT1)
+  \def\3{??ff}
+  \def\9{??Y}
+  %   - duplicated patterns to support font encoding OT1 are ok
+  \def\c#1{#1}
+  %   >>>>>>  UNCOMMENT the next line, if you do not want
+  %   >>>>>>  to use fonts in font encoding OT1
+  %\def\c#1{}
+  %
+  \catcode`\?=12
+  %
+\else
+  %
+  % For TeX 2:
+  %
+  % Define the accent macro " to throw an error message.
+  \catcode`\"=13
+  \def"#1{\errmessage{Hyphenation pattern file corrupted!}}
+  %
+  %   - ignore all patterns with umlauts
+  \def\n#1{}
+  %
+  % With TeX 2 fonts in encoding T1 can be used, but all glyphs
+  % in positions > 127 can not be used in hyphenation patterns.
+  % Thus only patterns with glyphs in OT1 positions are included:
+  %   - define \3 to be code ^^Y (\ss in CM font encoding)
+  %   - define \9 to throw an error message
+  \def\3{^^Y}
+  \def\9{\errmessage{Hyphenation pattern file corrupted!}}
+  %   - ignore all duplicated patterns with \ss in T1 encoding
+  \def\c#1{}
+  %
+\fi
+%
+-->
+<hyphenation-info>
+
+    <hyphen-char value="-"/>
+    <hyphen-min before="2" after="2"/>
+
+    <classes>
+        <!-- @!$%^&*()_-+=~`{[}]:;'|<,.>?/0123456789 -->
+        aA
+        bB
+        cC
+        dD
+        eE
+        fF
+        gG
+        hH
+        iI
+        jJ
+        kK
+        lL
+        mM
+        nN
+        oO
+        pP
+        qQ
+        rR
+        sS
+        tT
+        uU
+        vV
+        wW
+        xX
+        yY
+        zZ
+        ��
+        ��
+        ��
+        ��
+        �
+    </classes>
+
+    <patterns>
+        .aa6l .ab3a4s .ab3ei .abi2 .ab3it .ab1l .ab1r .ab3u .ad3o4r .alti6
+        .ana3c .an5alg .an1e
+        .ang8s2t1 <!-- % .ang8s (des einzige Wort, das mit angs... beginnt !) -->
+        .an1s .ap1p .ar6sc .ar6ta .ar6tei .as2z
+        .au2f1 .au2s3 .be5erb .be3na .ber6t5r .bie6r5 .bim6s5t .brot3 .bru6s
+        .ch6 .che6f5 .da8c .da2r .dar5in .dar5u .den6ka .de5r6en .des6pe
+        .de8spo .de3sz .dia3s4 .dien4 .dy2s1 .ehren5 .eine6 .ei6n5eh .ei8nen
+        .ein5sa .en6der .en6d5r .en3k4 .en8ta8 .en8tei .en4t3r .epo1 .er6ban
+        .er6b5ei .er6bla .er6d5um .er3ei .er5er .er3in .er3o4b .erwi5s .es1p
+        .es8t1l  .es8t1n <!-- % .es8t -->
+        .ex1a2 .ex3em .fal6sc .fe6st5a .flu4g3 .furch8 .ga6ner .ge3n4a
+        .ge5r�
+        .ges6
+        .halb5 .halbe6 .hal6br .haup4 .hau4t .heima6 .he4r3e
+        .her6za .he5x .hin3 .hir8sc .ho4c .hu3sa .hy5o .ibe5 .ima6ge .in1
+        .ini6 .is5chi .jagd5 .kal6k5o .ka6ph .ki4e .kop6f3 .kraf6 .k�5ra
+        .lab6br .liie6 .lo6s5k .l�4s3t .ma5d .mi2t1 .no6th .no6top
+        .obe8ri .ob1l .obs2 .ob6st5e .or3c .ort6s5e .ost3a .oste8r .pe4re
+        .pe3ts .ph6 .po8str .rau4m3 .re5an .ro8q .ru5the .r�5be
+        <!-- % .r�8stet  -->
+        .sch8 .se6e .se5n6h .se5ra .si2e .spi6ke .st4 .sy2n
+        .tages5 .tan6kl .ta8th .te6e .te8str .to6der .to8nin .to6we .um1
+        .umpf4 .un1 .une6 .unge5n .ur1c .ur5en .ve6rin .vora8 .wah6l5 .we8ges
+        .we8s2t .wes3te
+        .wo6r .wor3a .wun4s .zi4e .zuch8 .�nde8re .�ch8 aa1c aa2gr
+        aal5e aa6r5a a5arti aa2s1t aat2s 6aba ab3art 1abdr 6abel aben6dr
+        ab5erk ab5err ab5esse 1abf 1abg 1abh� ab1ir 1abko a1bl ab1la
+        5ablag a6bla� ab4ler ab1lu a8bl� 5a6bl� abma5c
+        1abn ab1ra ab1re 5a6brec ab1ro
+        ab1s
+        ab8sk abs2z 3abtei ab1ur 1abw
+        5abze 5abzu ab1�n ab�u8 a4ce.  a5chal ach5art ach5au a1che
+        a8chent ach6er.  a6ch5erf a1chi ach1l ach3m ach5n a1cho ach3re a1chu
+        ach1w a1chy ach5�f ack1o acks6t ack5sta a1d 8ad.  a6d5ac ad3ant
+        ad8ar 5addi a8dein ade5o8 adi5en 1adj 1adle ad1op a2dre 3adres adt1
+        1adv a6d� a1e2d ae1r a1er.  1aero 8afa a3fal af1an a5far a5fat
+        af1au a6fentl a2f1ex af1fr af5rau af1re 1afri af6tent af6tra aft5re
+        a6f5um 8af� ag5abe 5a4gent ag8er ages5e 1aggr ag5las ag1lo a1gn
+        ag2ne 1agog a6g5und a1ha a1he ah5ein a4h3erh a1hi ahl1a ah1le ah4m3ar
+        ahn1a a5ho ahra6 ahr5ab ah1re ah8rei ahren8s ahre4s3 ahr8ti ah1ru a1hu
+        ah8� ai3d2s ai1e aif6 a3inse ai4re.  a5isch.  ais8e a3ismu ais6n
+        aiso6 a1j 1akad a4kade a1ke a1ki 1akko 5akro1 a5lal al5ans 3al8arm
+        al8beb al8berw alb5la 3album al1c a1le a6l5e6be a4l3ein a8lel a8lerb
+        a8lerh a6lert 5a6l5eth 1algi al4gli al3int al4lab al8lan al4l3ar
+        alle3g a1lo a4l5ob al6schm al4the
+        <!-- % altist5  -->
+        al4t3re 8a1lu alu5i a6lur
+        alu3ta a1l� a6mate 8ame.  5a6meise am6m5ei am6mum am2n ampf3a
+        am6schw am2ta a1mu a1m� a3nac a1nad anadi5e an3ako an3alp 3analy
+        an3ame an3ara a1nas an5asti a1nat anat5s an8dent ande4s3 an1ec an5eis
+        an1e2k 4aner.  a6n5erd a8nerf a6n5erke 1anfa 5anfert 1anf� 3angab
+        5angebo an3gli ang6lis an2gn 3angri ang5t6 5anh� ani5g ani4ka
+        an5i8on an1kl an6kno an4kro 1anl anma5c anmar4 3annah anne4s3 a1no
+        5a6n1o2d 5a6n3oma 5a6nord 1anr an1sa 5anschl an4soz an1st 5anstal
+        an1s2z 5antenn an1th 5anw� a5ny an4z3ed 5anzeig 5anzieh 3anzug
+        an1� 5an�s a1n� an�8d a1os a1pa 3apfel a2ph1t
+        aph5�6 a1pi 8apl apo1c apo1s
+        a6pos2t <!-- % a6pos2te  -->
+        a6poth 1appa ap1pr a1pr
+        a5p� a3p� a1ra a4r3af ar3all 3arbei 2arbt ar1c 2a1re ar3ein
+        ar2gl 2a1ri ari5es ar8kers ar6les ar4nan ar5o6ch ar1o2d a1rol ar3ony
+        a8ror a3ros ar5ox ar6schl 8artei ar6t5ri a1ru a1ry 1arzt arz1w
+        ar8z� ar�8m ar�6 ar5�m ar1�2 a1sa a6schec
+        asch5l asch3m a6schn a3s4hi as1pa asp5l
+        <!-- % a8steb % ueberfluessig ? -->
+        as5tev 1asth
+        <!-- % a6stoc % ueberfluessig ? -->
+        a1str ast3re 8a1ta ata5c ata3la a6tapf ata5pl a1te a6teli aten5a
+        ate5ran 6atf 6atg a1th at3hal 1athl 2a1ti 5atlant 3atlas 8atmus 6atn
+        a1to a6t5ops ato6ra a6t5ort.  4a1tr a6t5ru at2t1h at5t6h� 6a1tu
+        atz1w a1t� a1t� au1a au6bre auch3a au1e aue4l 5aufent
+        3auff� 3aufga 1aufn auf1t 3auftr 1aufw 3auge.  au4kle aule8s 6aum
+        au8mar aum5p 1ausb 3ausd 1ausf 1ausg au8sin
+        <!-- % 3auss  -->
+        au4sta 1ausw 1ausz
+        aut5eng au1th 1auto au�e8 a1v ave5r6a aver6i a1w a6wes a1x
+        a2xia a6xio a1ya a1z azi5er.  8a� 1ba 8ba8del ba1la ba1na
+        ban6k5r ba5ot bardi6n ba1ro basten6 bau3sp 2b1b bb6le b2bli 2b1c 2b1d
+        1be be1a be8at.  be1ch 8becht 8becke.  be5el be1en bee8rei be5eta bef2
+        8beff be1g2 beh�8 bei1s 6b5eisen bei3tr b8el bel8o belu3t be3nac
+        bend6o be6ners be6nerw be4nor ben4se6 bens5el be1n� be1n�
+        be1o2 b8er.  be1ra be8rac ber8gab.  ber1r be1r� bes8c bes5erh
+        bes2p be5tha bet5sc be1un be1ur 8bex be6zwec 2b1f8
+        <!-- % bfe6st5e  -->
+        2b1g2
+        bga2s5 bge1 2b1h bhole6 1bi bi1bl b6ie bi1el bi1la bil�5 bi1na
+        bi4nok
+        <!-- % bi5str  -->
+        bi6stu bi5tr bit4t5r b1j 2b1k2 bk�6 bl8 b6la.
+        6b1lad 6blag 8blam 1blat b8latt 3blau.  b6lav 3ble.  b1leb b1led
+        8b1leg 8b1leh 8bleid 8bleih 6b3lein
+        <!-- % blei3s  -->
+        ble4m3o 4blich b4lind
+        8bling b2lio 5blit b4litz b1loh 8b1los 1blu 5blum 2blun blut3a blut5sc
+        3bl� bl�s5c 5bl� 3bl� bl�8sc 2b1m 2b1n 1bo
+        bo1ch bo5d6s boe5 8boff 8bonk bo1ra b1ort 2b1p2 b1q 1br brail6 brast8
+        bre4a b5red 8bref 8b5riem b6riga bro1s b1rup b2ruz 8br�h
+        br�s5c 8bs b1sa b8sang b2s1ar b1sc bs3erl bs3erz b8sof b1s2p
+        bst1h b3stru b5st� b6sun 2b1t b2t1h 1bu bu1ie bul6k b8ure bu6sin
+        6b1v 2b1w 1by1 by6te.  8b1z
+        <!-- % bzi1s % ueberfluessig ? -->
+        1b� b5�6s5 1b�
+        b6�5bere b�ge6 b�gel5e b�r6sc 1ca cag6 ca5la ca6re
+        ca5y c1c 1ce celi4c celich5 ce1ro c8h 2ch.  1chae ch1ah ch3akt cha6mer
+        8chanz 5chara 3chari 5chato 6chb 1chef 6chei ch3eil ch3eis 6cherkl
+        6chf 4chh 5chiad 5chias 6chins 8chj chl6 5chlor 6ch2m 2chn6 ch8nie
+        5cho.  8chob choi8d 6chp ch3ren ch6res ch3r� 2chs 2cht cht5ha
+        cht3hi 5chthon ch6tin 6chuh chu4la 6ch3unt chut6t 8chw 1ci ci5tr c2k
+        2ck.  ck1ei 4ckh ck3l ck3n ck5o8f ck1r 2cks ck5stra ck6s5u c2l 1c8o
+        con6ne 8corb cos6t c3q 1c6r 8c1t 1cu 1cy 5c�1 c�5 1da.
+        8daas 2dabg 8dabr 6dabt 6dabw 1dac da2gr 6d5alk 8d5amt dan6ce.
+        dani5er dan8ker 2danl danla6 6dans 8danzi 6danzu d1ap da2r1a8 2d1arb
+        d3arc dar6men 4d3art 8darz 1dat 8datm 2d1auf 2d1aus 2d1b 2d1c 2d1d
+        d5de d3d2h dd�mme8 1de 2deal de5an de3cha de1e defe6 6deff 2d1ehr
+        5d4eic de5isc de8lar del6s5e del6spr de4mag de8mun de8nep dene6r
+        8denge.  8dengen de5o6d 2deol de5ram 8derdb der5ein de1ro der1r d8ers
+        der5um de4s3am de4s3an de4sau de6sil de4sin de8sor de4spr de2su 8deul
+        de5us.  2d1f df2l 2d1g 2d1h 1di dia5c di5ara dice5 di3chr di5ena di1gn
+        di1la dil8s di1na 8dind 6dinf 4d3inh 2d1ins di5o6d di3p4t di8sen dis1p
+        di5s8per di6s5to
+        dis3tr <!-- % dis5tra  -->
+        di8tan di8tin d1j 6dje 2dju 2d1k 2d1l 2d1m
+        2d1n6 dni6 dnje6 1do 6d5obe do6berf 6d5ony do3ran 6dord 2d1org dor4t3h
+        <!-- % do6ste  -->
+        6doth dott8e 2d1p d5q dr4 1drah 8drak d5rand 6dre.  4drech
+        d6reck 4d3reg 8d3reic d5reife 8drem 8d1ren 2drer 8dres.  6d5rh 1dria
+        d1ric 8drind droi6 dro5x 1dru 8drut dr�s5c 1dr� dr�5b
+        dr�8sc 2ds d1sa d6san dsat6 d1sc 5d6scha.  5dschik dse8e d8serg
+        8dsl d1sp d4spak ds2po d8sp� d1st d1s� 2dt d1ta d1te d1ti
+        d1to dt1s6 d1tu d5t� 1du du5als du1b6 du1e duf4t3r 4d3uh du5ie
+        8duml 8dumw 2d1und du8ni 6d5unt dur2c durch3 6durl 6dursa 8durt
+        dus1t <!-- % du1s -->
+        du8schr 2d1v 2d1w dwa8l 2d1z 1d� 6d�h 8d�nd d�6r
+        d�8bl d5�l d�r6fl d�8sc d5�4st
+        <!-- % d�s3te -->
+        1d� ea4ben e1ac e1ah e1akt e1al.  e5alf e1alg e5a8lin e1alk e1all
+        e5alp e1alt e5alw e1am e1and ea6nim e1ar.  e5arf e1ark e5arm e3art
+        e5at.  e6ate e6a5t6l e8ats e5att e6au.  e1aus e1b e6b5am ebens5e
+        eb4lie eb4ser eb4s3in e1che e8cherz e1chi ech3m 8ech3n ech1r ech8send
+        ech4su e1chu eck5an e5cl e1d ee5a ee3e ee5g e1ei ee5isc eei4s3t
+        ee6lend e1ell ee5l� e1erd ee3r4e ee8reng eere6s5 ee5r�
+        ee6tat e1ex e1f e6fau e8fe8b 3effek ef3rom ege6ra eglo6si 1egy e1ha
+        e6h5ach eh5ans e6hap eh5auf e1he e1hi ehl3a eh1le ehl5ein eh1mu ehn5ec
+        e1ho ehr1a eh1re ehre6n eh1ri eh1ru ehr5um e1hu eh1w e1hy e1h�
+        e1h� e3h�t ei1a eia6s ei6bar eich3a eich5r ei4dar ei6d5ei
+        ei8derf ei3d4sc ei1e 8eifen 3eifri 1eign eil1d ei6mab ei8mag ein1a4
+        ei8nat ei8nerh ei8ness ei6nete ein1g e8ini ein1k ei6n5od ei8nok ei4nor
+        e3ins� ei1o e1irr ei5ru ei8sab ei5schn ei6s5ent ei8sol ei4t3al
+        eit3ar eit1h ei6thi ei8tho eit8samt ei6t5um e1j 1ekd e1ke e1ki e1k2l
+        e1kn ekni4 e1la e2l1al 6elan e6lanf e8lanl e6l5ans el3arb el3arm
+        e6l3art 5e6lasti e6lauge elbst5a e1le 6elef ele6h e6l5ehe e8leif
+        e6l5einh 1elek e8lel 3eleme e6lemen e6lente el5epi e4l3err e6l5ersc
+        elf2l elg2 e6l5ins ell8er 4e1lo e4l3ofe el8soh el8tent 5eltern e1lu
+        elut2 e1l� e1l� em8dei em8meis 4emo emo5s 1emp1f 1empt 1emto
+        e1mu emurk4 emurks5 e1m� en5a6ben en5achs en5ack e1nad en5af
+        en5all en3alt en1am en3an.  en3ant en3anz en1a6p en1ar en1a6s 6e1nat
+        en3auf en3aus en2ce enda6l end5erf end5erg en8dess 4ene.  en5eck
+        e8neff e6n5ehr e6n5eim en3eis 6enem.  6enen e4nent 4ener.  e8nerd
+        e6n3erf e4nerg 5energi e6n5erla en5ers e6nerst en5erw 6enes e6n5ess
+        e2nex en3glo 2eni enni6s5 ennos4 enns8 e1no e6nober eno8f en5opf
+        e4n3ord en8sers ens8kl en1sp ens6por en5t6ag enta5go en8terbu en6tid
+        3entla ent5ric 5entwic 5entwu 1entz enu5i e3ny en8zan en1�f
+        e1n�s e1n�g eo1c e5o6fe e5okk e1on.  e3onf e5onk e5onl e5onr
+        e5opf e5ops e5or.  e1ord e1org eo5r6h eo1t e1pa e8pee e6p5e6g ep5ent
+        e1p2f e1pi 5epid e6pidem e1pl 5epos e6pos.  ep4p3a e1pr e1p� e1q
+        e1ra.  er5aal 8eraba e5rabel er5a6ben e5rabi er3abs er3ach era5e
+        era5k6l er3all er3amt e3rand e3rane er3ans e5ranz.  e1rap er3arc
+        e3rari er3a6si e1rat erat3s er3auf e3raum 3erbse er1c e1re 4e5re.
+        er3eck er5egg er5e2h 2erei e3rei.  e8reine er5einr 6eren.  e4r3enm
+        4erer.  e6r5erm er5ero er5erst e4r3erz er3ess 5erf�l er8gan.
+        5ergebn er2g5h 5erg�nz 5erh�hu 2e1ri eri5ak e6r5iat e4r3ind
+        e6r5i6n5i6 er5ins e6r5int er5itio er1kl 3erkl� 5erl�s.
+        ermen6s er6nab 3ernst 6e1ro.  e1rod er1o2f e1rog 6e3roi ero8ide e3rol
+        e1rom e1ron e3rop8 e2r1or e1ros e1rot er5ox ersch4 5erstat er6t5ein
+        er2t1h er5t6her 2e1ru eruf4s3 e4r3uhr er3ums e5rus 5erwerb e1ry er5zwa
+        er3zwu er�8m er5�s er�8 e3r�s. e6r1�2b e1sa
+        esa8b e8sap e6s5a6v e1sc esch4l ese1a es5ebe eserve5 e8sh es5ill
+        es3int es4kop e2sl eso8b e1sp espei6s5 es2po es2pu 5essenz e6stabs
+        e6staf e6st5ak est3ar e8stob e1str est5res es3ur e2sz e1s� e1ta
+        et8ag etari5e eta8ta e1te eten6te et5hal e5thel e1ti 1etn e1to e1tr
+        et3rec e8tscha et8se et6tei et2th et2t1r e1tu etu1s et8zent et8zw
+        e1t� e1t� e1t� eu1a2 eu1e eue8rei eu5fe euin5 euk2
+        e1um.  eu6nio e5unter eu1o6 eu5p 3europ eu1sp eu5str eu8zo e1v eval6s
+        eve5r6en ever4i e1w e2wig ex1or 1exp 1extr ey3er.  e1z e1�2
+        e5�8 e1� e8�es fa6ch5i fade8 fa6del fa5el.
+        fal6lo falt8e fa1na fan4gr 6fanl 6fap far6ba far4bl far6r5a 2f1art
+        fa1sc fau8str fa3y 2f1b2 6f1c 2f1d 1fe 2f1eck fe6dr feh6lei f6eim
+        8feins f5eis fel5en 8feltern 8femp fe5rant 4ferd.  ferri8 fe8stof
+        fe6str fe6stum fe8tag fet6ta fex1 2ff f1fa f6f5arm f5fe ffe5in ffe6la
+        ffe8ler ff1f f1fla ff3lei ff4lie ff8sa ff6s5ta 2f1g2 fgewen6 4f1h 1fi
+        fid4 fi3ds fieb4 fi1la fi8lei fil4m5a f8in.  fi1na 8finf fi8scho fi6u
+        6f1j 2f1k2 f8lanz fl8e 4f3lein 8flib 4fling f2lix 6f3lon 5flop 1flor
+        5f8l�c 3fl�t 2f1m 2f1n 1fo foh1 f2on fo6na 2f1op fo5ra
+        for8mei for8str for8th for6t5r fo5ru 6f5otte 2f1p8 f1q fr6 f5ram
+        1f8ran f8ra� f8re.  frei1 5frei.  f3reic f3rest f1rib
+        8f1ric 6frig 1fris fro8na fr�s5t 2fs f1sc f2s1er f5str
+        fs3t�t 2ft f1tak f1te ft5e6h ftere6 ft1h f1ti f5to f1tr ft5rad
+        ft1sc ft2so f1tu ftwi3d4 ft1z 1fu 6f5ums 6funf fun4ka fu8�end
+        6f1v 2f1w 2f1z 1f� f�1c 8f�rm 6f�ug
+        f�8� f�de3 8f�f 3f�r 1f�
+        f�n4f3u 1ga ga6bl 6gabw 8gabz g3a4der ga8ho ga5isc 4gak ga1la
+        6g5amt ga1na gan5erb gan6g5a ga5nj 6ganl 8gansc 6garb 2g1arc 2g1arm
+        ga5ro 6g3arti ga8sa ga8sc ga6stre 2g1atm 6g5auf gau5fr g5aus 2g1b g5c
+        6gd g1da 1ge ge1a2 ge6an ge8at.  ge1e2 ge6es gef2 8geff ge1g2l ge1im
+        4g3eise geist5r gel8bra gelt8s ge5l� ge8nin gen3k 6g5entf
+        ge3n� ge1or ge1ra ge6rab ger8au 8gerh� ger8ins ge1ro 6g5erz.
+        ge1r� ge1r� ge1s ges2p
+        ge2s7te. ge2s7ten ge2s7ter ge2s7tik
+        ge5unt 4g3ex3 2g1f8 2g1g g1ha 6g1hei
+        5ghel.  g5henn 6g1hi g1ho 1ghr g1h� 1gi gi5la gi8me.  gi1na
+        4g3ins
+        gis1tr <!-- % gi3str % lat. Trennregeln ? -->
+        g1j 2g1k 8gl.  1glad g5lag glan4z3 1glas 6glass 5glaub
+        g3lauf 1gle.  g5leb 3gleic g3lein 5gleis 1glem 2gler 8g3leu gli8a
+        g2lie 3glied 1g2lik 1g2lim g6lio 1gloa 5glom 1glon 1glop g1los g4loss
+        g5luf 1g2ly 1gl� 2g1m gn8 6gn.  1gna 8gnach 2gnah g1nas g8neu
+        g2nie g3nis 1gno 8gnot 1go goe1 8gof 2gog 5gogr 6g5oh goni5e 6gonist
+        go1ra 8gord 2g1p2 g1q 1gr4 g5rahm gra8m gra4s3t 6g1rec gre6ge 4g3reic
+        g5reit 8grenn gri4e g5riem 5grif 2grig g5ring 6groh 2grot gro6�
+        4grut 2gs gs1ab g5sah gs1ak gs1an gs8and gs1ar gs1au g1sc
+        gs1ef g5seil gs5ein g2s1er gs1in g2s1o gso2r gs1pr g2s1u 2g1t g3te
+        g2t1h 1gu gu5as gu2e 2gue.  6gued 4g3uh 8gums 6g5unt
+        <!-- % gu1s  -->
+        gut3h gu2tu
+        4g1v 2g1w gy1n g1z 1g� 8g�8m 6g�rm 1g� 1g�
+        6g�b 1haa hab8r ha8del hade4n 8hae ha5el.  haf6tr 2hal.  ha1la
+        hal4b5a 6hale 8han.  ha1na han6dr han6ge.  2hani h5anth 6hanz 6harb
+        h3arbe h3arme ha5ro ha2t1h h1atm hau6san ha8� h1b2 h1c h1d
+        he2bl he3cho h3echt he5d6s 5heft h5e6he.  hei8ds h1eif 2hein he3ism
+        he5ist.  heit8s3 hek6ta hel8lau 8helt he6mer 1hemm 6h1emp hen5end
+        hen5klo hen6tri he2nu 8heo he8q her3ab he5rak her3an 4herap her3au
+        h3erbi he1ro he8ro8b he4r3um her6z5er he4spe he1st heta6 het5am he5th
+        heu3sc he1xa hey5e h1f2 h1g hgol8 h1h h1iat hie6r5i hi5kt hil1a2
+        hil4fr hi5nak hin4ta hi2nu hi5ob hirn5e hir6ner hi1sp hi1th hi5tr
+        5hitz h1j h6jo h1k2 hlabb4 hla4ga hla6gr h5lai hl8am h1las h1la�
+        hl1c h1led h3lein h5ler.  h2lif h2lim h8linf hl5int h2lip
+        h2lit h4lor h3lose h1l�s hme5e h2nee h2nei hn3eig h2nel hne8n
+        hne4p3f hn8erz h6netz h2nip h2nit h1nol hn5sp h2nuc h2nud h2nul hoch1
+        1hoh hoh8lei 2hoi ho4l3ar 1holz h2on ho1ra 6horg 5horn.  ho3sl hos1p
+        ho4spi h1p hpi6 h1q 6hr h1rai h8rank h5raum hr1c hrcre8 h1red h3reg
+        h8rei.  h4r3erb h8rert hrg2 h1ric hr5ins h2rom hr6t5erl hr2t1h hr6t5ra
+        hr8tri h6rum hr1z hs3ach h6s5amt h1sc h6s5ec h6s5erl hs8erle h4sob
+        h1sp h8spa� h8spel hs6po h4spun h1str h4s3tum hs3und
+        h1s� h5ta.  h5tab ht3ac ht1ak ht3ang h5tanz ht1ar ht1at h5taub
+        h1te h2t1ec ht3eff ht3ehe h4t3eif h8teim h4t3ein ht3eis h6temp h8tentf
+        hte8ren h6terf� h8tergr h4t3erh h6t5ersc h8terst h8tese h8tess
+        h2t1eu h4t3ex ht1he ht5hu h1ti ht5rak hts3ah ht1sc ht6sex ht8sk ht8so
+        h1tu htz8 h5t�m hub5l hu6b5r huh1l h5uhr.  huld5a6 hu8lent
+        hu8l� h5up.  h1v h5weib h3weis h1z h�8kl h�l8s
+        h�ma8tu8 h�8sche. h�t1s h�u4s3c 2h�.
+        2h�e 8h�i h�6s h�s5c h�hne6 h�l4s3t
+        h�tte8re i5adn i1af i5ak.  i1al.  i1al1a i1alb i1ald i5alei i1alf
+        i1alg i3alh i1alk i1all i1alp i1alr i1als i1alt i1alv i5alw i3alz
+        i1an.  ia5na i3and ian8e ia8ne8b i1ang i3ank i5ann i1ant i1anz i6apo
+        i1ar.  ia6rab i5arr i1as.  i1asm i1ass i5ast.  i1at.  i5ats i1au i5azz
+        i6b5eig i6b5eis ib2le i4blis i6brig i6b5unt i6b�b i1che ich5ei
+        i6cherb i1chi ich5ins ich1l ich3m ich1n i1cho icht5an icht3r i1chu
+        ich1w ick6s5te ic5l i1d id3arm 3ideal ide8na 3ideol ide5r� i6diot
+        id5rec id1t ie1a ie6b5ar iebe4s3 ie2bl ieb1r ie8bra ie4bre ie8b�
+        ie2dr ie1e8 ie6f5ad ief5f ie2f1l ie4fro ief1t i1ei ie4l3ec ie8lei
+        ie4lek i3ell i1en.  i1end ien6e i3enf i5enn ien6ne.  i1enp i1enr
+        i5ensa ien8stal i5env i1enz ie5o ier3a4b ie4rap i2ere ie4rec ie6r5ein
+        ie6r5eis ier8er i3ern.  ie8rum ie8rund ie6s5che ie6tau ie8tert ie5the
+        ie6t5ri i1ett ie5un iex5 2if i1fa if5ang i6fau if1fr if5lac i5f6lie
+        i1fre ift5a if6t5r ig3art 2ige i8gess ig5he i5gla ig2ni i5go ig3rot
+        ig3s2p i1ha i8ham i8hans i1he i1hi ih1n ih1r i1hu i8hum ih1w 8i1i ii2s
+        ii2t i1j i1k i6kak i8kerz i6kes ik4ler i6k5unt 2il i5lac i1lag il3ans
+        i5las i1lau il6auf i1le ile8h i8lel il2fl il3ipp il6l5enn i1lo ilt8e
+        i1lu i1l� i8mart imb2 i8mele i8mid imme6l5a i1mu i1m�
+        i5m� ina5he i1nat in1au inau8s 8ind.  in4d3an 5index ind2r 3indus
+        i5nec i2n1ei i8nerw 3infek 1info 5ingeni ing5s6o 5inhab ini5er.  5inj
+        in8k�t in8nan i1no inoi8d in3o4ku in5sau in1sp 5inspe 5instit
+        5instru ins4ze 5intere 5interv in3the in5t2r i5ny in�2 i1n�r
+        in1�s in�8 in5�d i1n�s 2io io1a8 io1c iode4 io2di
+        ioi8 i1ol.  i1om.  i1on.  i5onb ion2s1 i1ont i5ops i5o8pt i1or.
+        i3oral io3rat i5orc i1os.  i1ot.  i1o8x 2ip i1pa i1pi i1p2l i1pr i1q
+        i1ra ir6bl i1re i1ri ir8me8d ir2m1o2 ir8nak i1ro ir5rho ir6schl
+        ir6sch5r i5rus i5ry i5r� i1sa i8samt i6sar i2s1au i8scheh i8schei
+        isch5m isch3r isch�8 is8ele ise3ra i4s3erh is3err isi6de i8sind
+        is4kop ison5e is6por i8s5tum i5sty i5s� i1ta it5ab.  i2t1a2m
+        i8tax i1te i8tersc i1thi i1tho i5thr it8h� i1ti i8ti8d iti6kl
+        itmen4 i1to i8tof it3ran it3rau i1tri itri5o it1sc it2se it5spa it8tru
+        i1tu it6z5erg it6z1w i1t� it�6r5e it�t2 it�ts5
+        i1t� i1u iu6r 2i1v i6vad iva8tin i8vei i6v5ene i8verh i2vob i8vur
+        i1w iwi2 i5xa i1xe i1z ize8n i8zir i6z5w i�8m i1�6r
+        i5�t. i5�v i1�8 i�8 i6�5ers ja5la
+        je2t3r 6jm 5jo jo5as jo1ra jou6l ju5cha jugen4 jugend5 jung5s6
+        <!-- % ju1s -->
+        3j� 1ka 8kachs 8kakz ka1la kal5d kam5t ka1na 2kanl 8kapf ka6pl
+        ka5r6a 6k3arbe ka1ro kar6p5f 4k3arti 8karz ka1r� kasi5e ka6teb
+        kat8ta kauf6s kau3t2 2k1b 2k1c 4k1d kehr6s kehrs5a 8keic 2k1eig 6k5ein
+        6k5eis ke6lar ke8leis ke8lo 8kemp k5ente.  k3entf 8k5ents 6kentz ke1ra
+        k5erlau 2k1f8 2k1g 2k1h ki5fl 8kik king6s5 6kinh ki5os ki5sp ki5th
+        8ki8� 2k1k2 kl8 1kla 8klac k5lager kle4br k3leib 3kleid kle5isc
+        4k3leit k3lek 6k5ler.  5klet 2klic 8klig k2lim k2lin 5klip 5klop k3lor
+        1kl� 2k1m kmani5e kn8 6kner k2ni kn�8 1k2o ko1a2 ko6de.
+        ko1i koi8t ko6min ko1op ko1or ko6pht ko3ra kor6d5er ko5ru ko5t6sc k3ou
+        3kow 6k5ox 2k1p2 k1q 1kr8 4k3rad 2k1rec 4k3reic kre5ie 2krib 6krig
+        2krip 6kroba 2ks k1sa k6sab ksal8s k8samt k6san k1sc k2s1ex k5spat
+        k5spe k8spil ks6por k1spr kst8 k2s1uf 2k1t kta8l kt5a6re k8tein kte8re
+        k2t1h k8tinf kt3rec kt1s 1ku ku1ch kuck8 k3uhr ku5ie kum2s1 kunfts5
+        kun2s kunst3 ku8rau ku4ro kurz1
+        <!-- % ku1st  -->
+        4kusti ku1ta ku8�
+        6k1v 2k1w ky5n 2k1z 1k� k�4m 4k3�mi k�se5 1k�
+        k�1c k�1s 1k� k�1c k�r6sc
+        <!-- % k�1s  -->
+        1la.
+        8labf 8labh lab2r 2l1abs lach3r la8dr 5ladu 8ladv 6laff laf5t la2gn
+        5laken 8lamb la6mer 5lampe.  2l1amt la1na 1land lan4d3a lan4d3r lan4gr
+        8lanme 6lann 8lanw 6lan� 8lappa lap8pl lap6pr l8ar.  la5ra lar4af
+        la8rag la8ran la6r5a6s l3arbe la8rei 6larm.  la8sa la1sc la8sta lat8i
+        6l5atm 4lauss 4lauto 1law 2lb l8bab l8bauf l8bede l4b3ins l5blo
+        lbst5an lbst3e 8lc l1che l8chert l1chi lch3m l5cho lch5w 6ld l4d3ei
+        ld1re l6d�b le2bl le8bre lecht6s5 led2r 6leff le4gas 1lehr lei6br
+        le8inf 8leinn 5leistu 4lektr le6l5ers lemo2 8lemp l8en.  8lends
+        6lendun le8nend len8erw 6l5ents 4l3entw 4lentz 8lenzy 8leoz 6lepi
+        le6pip 8lepo 1ler l6er.  8lerbs 6l5erde le8reis le8rend le4r3er 4l3erg
+        l8ergr 6lerkl 6l5erzie 8ler� 8lesel lesi5e le3sko le3tha let1s
+        5leuc 4leuro leu4s3t le5xe 6lexp l1f 2l1g lgend8 l8gh lglie3 lglied6
+        6l1h 1li li1ar li1as 2lick li8dr li1en lien6n li8ers li8ert 2lie�
+        3lig li8ga8b li1g6n li1l8a 8limb li1na 4l3indu lings5
+        4l3inh 6linj link4s3 4linkt 2lint 8linv
+        <!-- % lion5s6t  -->
+        4lipp 5lipt 4lisam
+        livi5e 6l1j 6l1k l8keim l8kj lk2l lko8f lkor8 lk2sa lk2se 6ll l1la
+        ll3a4be l8labt ll8anl ll1b ll1c ll1d6 l1le l4l3eim l6l5eise ller3a
+        l4leti l5lip l1lo ll3ort ll5ov ll6spr llte8 l1lu ll3urg l1l�
+        l5l� l6l�b 2l1m l6m5o6d 6ln l1na l1no 8lobl lo6br 3loch.
+        l5o4fen 5loge.  5lohn 4l3ohr 1lok l2on 4l3o4per lo1ra 2l1ord 6lorg
+        4lort lo1ru 1los.  lo8sei 3losig lo6ve lowi5 6l1p lp2f l8pho l8pn
+        lp4s3te l2pt l1q 8l1r 2ls l1sa l6sarm l1sc l8sec l6s5erg l4s3ers l8sh
+        l5s6la l1sp ls4por ls2pu l1str l8suni l1s� 2l1t lt5amp l4t3ein
+        l5ten l6t5eng l6t5erp l4t3hei lt3her l2t1ho l6t5i6b lti1l l8tr�
+        lt1sc lt6ser lt4s3o lt5ums lu8br lu2dr lu1en8 8lu8fe luft3a luf8tr
+        lu6g5r 2luh l1uhr lu5it 5luk 2l1umf 2l1umw 1lun 6l5u6nio 4l3unte lu5ol
+        4lurg 6lurs l3urt lu4sto
+        lus1tr <!-- % lu3str % lat. Trennregeln ? -->
+        lu6st5re lu8su lu6tal lu6t5e6g lu8terg
+        lu3the lu6t5or lu2t1r lu6�5 l1v lve5r6u 2l1w 1ly lya6
+        6lymp ly1no l8zess l8zo8f l3zwei lz5wu 3l�nd l�5on
+        l�6sc l�t1s 5l�uf 2l�ug l�u6s5c l�5v
+        l1�l 1l�s l�1�6t 6l1�be 1ma
+        8mabg ma5chan mad2 ma5el 4magg mag8n ma1la ma8lau mal5d 8malde mali5e
+        malu8 ma8lut 2m1amp 3man mand2 man3ds 8mangr mani5o 8m5anst 6mappa
+        4m3arbe mar8kr ma1r4o mar8schm 3mas ma1sc ma1t� 4m5auf ma5yo 2m1b
+        mb6r 2m1c 2m1d md6s� 1me me1ch me5isc 5meld mel8sa 8memp me5nal
+        men4dr men8schl men8schw 8mentsp me1ra mer4gl me1ro 3mes me6s5ei me1th
+        me8� 2m1f6 2m1g 2m1h 1mi mi1a mi6ale mi1la 2m1imm mi1na
+        mi5n� mi4s3an mit1h mi5t6ra 3mitt mitta8 mi6�5 6mj
+        2m1k8 2m1l 2m1m m6mad m6m5ak m8menth m8mentw mme6ra m2mn mm5sp mm5ums
+        mmut5s m8m�n m1n8 m5ni 1mo mo5ar mo4dr 8mof mo8gal mo4kla mol5d
+        m2on mon8do mo4n3od
+        mon2s1tr <!-- % lat. Trennregeln ? -->
+        mont8a 6m5ony mopa6 mo1ra mor8d5a mo1sc mo1sp 5mot
+        moy5 2mp m1pa mpfa6 mpf3l mphe6 m1pi mpin6 m1pl mp2li m2plu mpo8ste
+        m1pr mpr�5 mp8th mput6 mpu5ts m1p� 8m1q 2m1r 2ms ms5au m1sc
+        msch4l ms6po m3spri m1str 2m1t mt1ar m8tein m2t1h mt6se mt8s�
+        mu5e 6m5uh mumi1 1mun mun6dr muse5e mu1ta 2m1v mvol2 mvoll3 2m1w 1my
+        2m1z m�6kl 1m�n m�1s m�5tr m�u4s3c 3m��
+        m�b2 6m�l 1m� 5m�n 3m�t 1na.
+        n5ab.  8nabn n1abs n1abz na6b� na2c nach3e 3nacht 1nae na5el
+        n1afr 1nag 1n2ah na8ha na8ho 1nai 6nair na4kol n1akt nal1a 8naly 1nama
+        na4mer na1mn n1amp 8n1amt 5nanc nan6ce n1and n6and.  2n1ang 1nani
+        1nann n1ans 8nanw 5napf.  1n2ar.  na2ra 2n1arc n8ard 1nari n8ark
+        6n1arm 5n6ars 2n1art n8arv 6natm nat6s5e 1naue 4nauf n3aug 5naui n5auk
+        na5um 6nausb 6nauto 1nav 2nax 3naz 1na� n1b2 nbau5s n1c
+        nche5e nch5m 2n1d nda8d n2d1ak nd5ans n2d1ei nde8lac ndel6sa n8derhi
+        nde4se nde8stal n2dj ndnis5 n6d5or6t nd3rec nd3rot nd8samt nd6sau
+        ndt1h n8dumd 1ne ne5as ne2bl 6n5ebn 2nec 5neei ne5en ne1g4l 2negy
+        4n1ein 8neis 4n3e4lem 8nemb 2n1emp nen1a 6n5energ nen3k 8nentb
+        4n3en3th 8nentl 8n5entn 8n5ents ne1ra ne5r8al ne8ras 8nerbi 6n5erde.
+        nere5i6d nerfor6 6n5erh� 8nerl� 2n1err n8ers.  6n5ertra
+        2n1erz nesi3e net1h neu4ra neu5sc 8neu� n1f nf5f nf2l
+        nflei8 nf5lin nft8st n8g5ac ng5d ng8en nge8ram ngg2 ng1h n6glic ng3rip
+        ng8ru ng2se4 ng2si n2g1um n1gy n8g�l n1h nhe6r5e 1ni ni1bl
+        ni5ch� ni8dee n6ie ni1en nie6s5te niet5h ni8etn 4n3i6gel n6ik
+        ni1la 2n1imp ni5na 2n1ind 8ninf 6n5inh ni8nit 6n5inn 2n1ins 4n1int
+        n6is
+        nis1tr <!-- % ni3str % lat. Trennregeln ? -->
+        ni1th ni1tr n1j n6ji n8kad nk5ans n1ke n8kerla n1ki nk5inh
+        n5kl� n1k2n n8k5not nk3rot n8kr� nk5spo nk6t5r n8kuh
+        n6k�b n5l6 nli4mi n1m nmen4s n1na n8nerg nni5o n1no nn4t3ak nnt1h
+        nnu1e n1ny n1n� n1n� n1n� no5a no4b3la 4n3obs 2nobt
+        noche8 no6die no4dis no8ia no5isc 6n5o6leu no4mal noni6er 2n1onk n1ony
+        4n3o4per 6nopf 6nopti no3ra no4ram nor6da 4n1org 2n1ort n6os no1st
+        8nost.  no8tan no8ter noty6pe 6n5ox n1p2 n1q n1r nr�s3 6ns n1sac
+        ns3ang n1sc n8self n8s5erf n8serg n6serk ns5erw n8sint n1s2pe n1spr
+        n6s5tat.
+        <!-- % n5s6te. -->
+        n6stob n1str n1ta n4t3a4go nt5anh nt3ark nt3art
+        n1te nt3eis nte5n6ar nte8nei nter3a nte6rei nt1ha nt6har n3ther nt5hie
+        n3thus n1ti nti1c n8tinh nti1t ntlo6b ntmen8 n1to nt3o4ti n1tr ntra5f
+        ntra5ut nt8rea nt3rec nt8rep n4t3rin nt8rop n4t3rot n4tr� nt1s
+        nts6an nt2sk n1tu nt1z n1t� n1t� n8t�l n1t� 1nu
+        nu1a nu5el nu5en 4n1uhr nu5ie 8numl 6n5ums 6n5umw 2n1und 6nuni 6n5unr
+        2n1unt 2nup 2nu6r n5uri nu3skr nu5ta n1v 8n1w 1nys n1za n6zab n2z1ar
+        n6zaus nzi4ga n8zof n6z5unt n1zw n6zwir 1n�c 5n�e 5n�i
+        n8�l n�6m n�6re n5�rz 5n�us n1�l
+        1n�t n5�z 5n�. 6n1�2b 5n��
+        o5ab.  oa2l o8ala o1a2m o1an ob1ac obe4ra o6berh 5o4bers o4beru
+        obe6ser 1obj o1bl o2bli ob5sk 3obst.  ob8sta obst5re ob5sz o1che
+        oche8b o8chec o3chi och1l och3m ocho8f o3chro och3to o3chu och1w o1d
+        o2d1ag od2dr ode5i ode6n5e od1tr o5e6b o5e6der.  oe8du o1ef o1e2l
+        o1e2p o1er.  o5e8x o1fa of8fan 1offi of8fin of6f5la o5fla o1fr 8o1g
+        og2n o1ha o1he o6h5eis o1hi ohl1a oh1le oh4l3er 5ohm.  oh2ni o1ho
+        oh1re oh1ru o1hu oh1w o1hy o1h� o5ia o1id.  o8idi oi8dr o5ids
+        o5isch.  oiset6 o1ism o3ist.  o5i6tu o1j o1k ok2l ok3lau o8kl�
+        1okta o1la old5am old5r o1le ole5in ole1r ole3u ol6gl ol2kl olk4s1
+        ol8lak ol8lauf.  ol6lel ol8less o1lo
+        ol1s ol2ster
+        ol6sk o1lu oly1e2 5olym
+        o2mab om6an o8mau ombe4 o8merz om5sp o1mu o8munt o1m� o1m�
+        o1na ona8m on1ax on8ent o6n5erb 8oni oni5er.  on1k on6n5a6b o1no ono1c
+        o4nokt 1ons onts8 o1n� oo8f 1oog oo2pe oo2sa o1pa 3o4pera o3pfli
+        opf3lo opf3r o1pi o1pl o2pli o5p6n op8pa op6pl o1pr o3p4ter 1opti
+        o1p� o5p� o1q o1ra.  o3rad o8radd 1oram o6rang o5ras o8rauf
+        or5cha or4d3a4m or8dei or8deu 1ordn or4dos o1re o5re.  ore2h o8r5ein
+        ore5isc or6enn or8fla or8fli 1orga 5orgel.  or2gl o1ri 5o6rient or8nan
+        or8n� o1ro or1r2h or6t5an or8tau or8tere o1rus o1ry o1r�
+        or1�2 o1sa osa3i 6ose o8serk o1sk o6ske o6ski os2kl os2ko os2kr
+        osni5e o2s1o2d o3s4per o4stam o6stau o3stra ost3re osu6 o6s5ur o5s6ze
+        o1ta ot3auf o6taus o1te o6terw o1th othe5u o2th1r o1ti o1to oto1a
+        ot1re o1tri o1tro ot1sc o3tsu ot6t5erg ot2t3h ot2t5r ot8t� o1tu
+        ou3e ouf1 ou5f6l o5u6gr ou5ie ou6rar ou1t6a o1v o1wa o1we o6wer.  o1wi
+        owid6 o1wo o5wu o1xe oy5al.  oy1e oy1i o5yo o1z oza2r 1o2zea ozo3is
+        o�8 o�5elt o�1t 3paa pa6ce 5pad pag2 1pak
+        pa1la pa8na8t pani5el pa4nor pan1s2 1pap pap8s pa8rei par8kr paro8n
+        par5o6ti part8e 5partei 3partn pas6sep pa4tha 1pau 6paug pau3sc p1b
+        8p5c 4p1d 1pe 4peic pe5isc 2pek pen3k pen8to8 p8er pe1ra pere6 per5ea
+        per5eb pe4rem 2perr per8ran 3pers 4persi pe3r� pe4sta pet2s
+        p2f1ec p4fei pf1f pf2l 5pflanz pf8leg pf3lei 2pft pf3ta p1g 1ph 2ph.
+        2p1haf 6phb 8phd 6p5heit ph5eme 6phg phi6e 8phk 6phn p5holl pht2
+        ph3tha 4ph3the phu6 6phz pi1en pi5err pi1la pi1na 5pinse pioni8e 1pis
+        pi1s2k pi1th p1k pl8 5pla p2lau 4plei p3lein 2pler 6p5les 2plig p6lik
+        6p5ling p2liz plo8min 6p1m p1n 1p2o 8poh 5pol po8lan poly1 po3ny po1ra
+        2porn por4t3h po5r� 5poti p1pa p6p5ei ppe6la pp5f p2p1h p1pi pp1l
+        ppp6 pp5ren
+        pp1s pp2ste
+        p5p� pr6 3preis 1pres 2p3rig 5prinz 1prob 1prod
+        5prog pro8pt pro6t5a prote5i 8pro� pr�3l 1pr�s
+        pr�te4 1pr�f p5schl 2pst 1p2sy p1t p8to8d pt1s 5p6ty 1pu
+        pu1b2 2puc pu2dr puf8fr 6p5uh pun8s pu8rei pu5s6h pu1ta p1v p3w 5py
+        py5l p1z p�6der p5�6m p�8nu 8p�r p�t5h
+        p�t1s qu6 1qui 8rabk ra6bla 3rable ra2br r1abt 6rabz ra4dan ra2dr
+        5rafal ra4f3er ra5gla ra2g3n 6raha ral5am 5rald 4ralg ra8lins 2rall
+        ral5t 8ramei r3anal r6and ran8der ran4dr 8ranf 6ranga 5rangi ran8gli
+        r3angr rans5pa 8ranw r8anz.  ra5or 6rapf ra5pl rap6s5er 2r1arb 1rarh
+        r1arm ra5ro 2r1art 6r1arz ra8tei ra6t5he 6ratl ra4t3ro r5atta raue4n
+        6raus.  r5austa rau8tel raut5s ray1 r1b rb5lass r6bler rb4lie rbon6n
+        r8brecht rb6s5t� r8ces r1che rch1l rch3m rch3re rch3tr rch1w 8rd
+        r1da r8dachs r8dap rda5ro rde5ins rdio5 r8dir rd3ost r1dr r8drau 1re.
+        re1ak 3reakt re3als re6am.  re1as 4reben re6bl rech5a r8edi re3er
+        8reff 3refl 2reh 5reha r4ei.  reich6s5 8reier 6reign re5imp 4r3eina
+        6r3einb 6reing 6r5einn 6reinr 4r3eins r3eint reli3e 8r5elt 6rempf
+        2remt ren5a6b ren8gl r3enni 1reno 5rente 4r3enth 8rentl 4r3entw 8rentz
+        ren4zw re1on requi5 1rer rer4bl 6rerbs 4r3erd 8rerh� 8rerkl
+        4r3erla 8rerl� 4r3erns 6r5ern� rer5o 6r5erreg r5ertr r5erwec
+        r5er� re2sa re8schm 2ress re5u8ni 6rewo 2r1ex r1f r8ferd rf4lie
+        8r1g r8gah rge4bl rge5na rgest4 rg6ne r2gni2 r8gob r4g3ret rg8sel r1h8
+        r2hy 5rhyt ri1ar ri5cha rid2g r2ie rieg4s5 ri8ei ri1el ri6ele ri1en
+        ri3er.  ri5ers.  ri6fan ri8fer ri8fr 1r2ig ri8kn ri5la rim�8
+        ri1na r8inde rin4ga rin6gr 1rinn 6rinner rino1 r8insp 4rinst
+        ri1n� ri5o6ch ri1o2d ri3o6st 2r1ir r2is ri3sko ri8spr
+        <!-- % ri8st� -->
+        ri5sv r2it 6r5i6tal ri5tr ri6ve.  8r1j 6rk r1ke rkehrs5 r1ki r3klin
+        r1k2n rk3str rk4t3an rk6to r6kuh rk�4s3t r1l r5li rline5a 6r1m
+        r6manl rma4p r4m3aph r8minf r8mob rm5sa 2rn r1na rna8be r5ne rn2ei
+        r6neif r6nex r6nh rn1k r1no r6n5oc rn1sp r1n� r1n� ro6bern
+        6robs ro1ch 3rock.  ro5de ro1e 4rofe ro8hert 1rohr ro5id ro1in ro5isc
+        6rolym r2on 6roog ro6phan r3ort ro1s2p ro5s6w ro4tau ro1tr ro6ts 5rout
+        r1p rpe8re rp2f r2ps r2pt r1q 2rr r1ra r1re rrer6
+        rr6hos r5rh�
+        r1ri r1ro rro8f rr8or rror5a r1ru r3ry r1r� r1r� r1r�
+        2r1s
+        r2ste r2sti <!-- % aber: 3ste... s.u.! -->
+        r6sab r4sanf rse6e rse5na r2sh r6ska r6ski rs2kl r8sko r2sl rs2p
+        r6stauf r8sterw r8stran rswi3d4 r2sz 2r1t rt3art r8taut r5tei rt5eige
+        r8tepe r4t3erh r8terla r4t3hei r5t6hu r4t3int rt5reif rt1sc rt6ser
+        rt6s5o rt6s5u rt5und r8turt rube6 ru1en 1r4uf ruf4st ru1ie 2r1umg
+        2r1uml 2rums run8der run4d5r 6rundz 6runf 8runs 2r1unt 2r1ur r6us
+        ru6sta
+        rus1tr <!-- % ru3str % lat. Trennregeln ? -->
+        ru6tr 1ruts r1v rven1 rvi2c r1w r1x r1za rz5ac r6z5al
+        r8z1ar r8zerd r6z5erf rz8erh rz4t3h r8zum r�4ste r�u8sc
+        r1�f 5r�hr r�5le 3r�ll 5r�mis r1�r
+        r�2sc 3r�mp 1sa.  1saa s3a4ben sa2bl 2s1abs 6s1abt 6sabw
+        3sack.  6s3a4der 1saf sa1fa 4s1aff sa5fr 1sag 1sai sa1i2k1 4s1akt 1sal
+        sa1la 4s3alpi 6salter salz3a 1sam s5anb san2c 1sand s5angeh 6sanl
+        2s1ans 6s3antr 8s1anw s1ap s6aph 8sapo sap5p6 s8ar.  2s1arb 3sarg
+        s1arm sa5ro 2s1art 6s1arz 1sas 1sat sat8a 2s1atl sa8tom 3s8aue s5auff
+        sau5i s6aur 2s1aus 5s6ause 2s1b2 2sca s4ce 8sch.  3scha.  5schade
+        3schaf 3schal sch5ame 8schanc 8schb 1sche 6schef 8schex 2schf 2schg
+        2schh 1schi 2schk 5schlag 5schlu 6schm��
+        6schna� 1scho 6schord 6schp 3schri 8schric 8schrig
+        8schrou 6schs 2scht sch3ta sch3tr 1schu 8schunt 6schv 2schz 5sch�
+        5sch� 2sco scre6 6scu 2s1d 1se se5an se1ap se6ben se5ec see5i6g
+        se3erl 8seff se6han se8hi se8h� 6s5eid.  2s1eig s8eil 5sein.
+        sei5n6e 6s5einh 3s8eit 3sel.  se4lar selb4 6s3e4lem se8lerl 2s1emp
+        sen3ac se5nec 6s5ents 4sentz s8er.  se8reim ser5inn 8serm�
+        8s5erzi 6ser�f se1um 8sexa 6sexp 2s1f2 sfal8ler 2s3g2 sge5b2 s1h
+        s8hew 5s6hip 5s4hop 1si 2siat si1b sicht6s 6s5i6dee siege6s5 si1en
+        si5err si1f2 si1g2n si6g5r si8kau sik1i si4kin si2kl si8k� si1la
+        sil6br si1na 2s1inf sin5gh 2s1inh sinne6s5 2s1ins si5ru si5str 4s1j
+        s1k2 6sk.  2skau skel6c skelch5 s6kele 1s2ki.  3s4kin.  s6kiz s8kj
+        6skn 2skow 3skrib 3skrip 2sku 8sk� s1l s8lal slei3t s4low 2s1m
+        s1n 6sna 6snot 1so so1ch 2s1odo so4dor 6s5o4fen solo3 s2on so5of 4sope
+        so1ra 2s1ord 4sorga sou5c so3un 4s3ox sp2 8spaa 5spal 1span 2spap
+        s2pec s4peis 1spek s6perg 4spers s6pes 2s1pf 8sphi 1s2ph� 1spi
+        spi4e 6s5pig 6spinse 2spis 2spla 2spol 5s6pom 6s5pos 6spoti 1spra
+        3s8prec 6spreis 5spring 6sprob 1spru s2pul 1s2pur 6spy 5sp�n
+        1sp� s1q 2s1r
+        <!-- % 2s1s2 -->
+        <!-- % 2s1sa 2s1se 2s1si 2s1so 2s1s� 2s1s� 2s1s� 2s1sch  -->
+        2ssa 2sse 2ssi 2sso 2ss� 2ss� 2ss� 2s1sch
+        sse8nu ssini6s ssoi6r 2st.
+        1sta 4stafe 2stag
+        sta3la 6stale
+        4s2talg
+        8stalk 8stamt 6st5anf 4stans 6stanw 6starb sta4te
+        6staus 2stb 6stc 6std
+        s1te <!-- % 1ste  -->
+        4steil
+        <!-- % 8stemb % ueberfluessig ? -->
+        6steppi
+        <!-- % 8stese % ueberfluessig ? -->
+        8stesse 6stf 2stg 2sth st1ha st3hei s8t1hi st1ho st5hu
+        s1ti <!-- % 1sti  -->
+        s2ti4el <!-- % sti4el -->
+        4s2tigm
+        <!-- % sti3na  -->
+        6s2tind <!-- % 6stind  -->
+        4s2tinf
+        s2ti8r
+        2stk 2stl 2stm
+        1sto 6stoll.  4st3ope
+        6stopf. 6stord 6stp
+        <!-- % 5stra. % ueberfluessig ? -->
+        4strai
+        s3tral <!-- % 3s4tral % lat. Trennregeln ? -->
+        6s5traum 3stra�
+        3strec 6s3tref 8streib 5streif 6streno 6stres 6strev
+        <!-- % 5s6tria % lat. Trennregeln ? -->
+        2st5rig
+        <!-- % 5strik % ueberfluessig ? -->
+        8s2t1ris <!-- % 8strisi -->
+        <!-- % 3s4troa % ueberfluessig ? -->
+        s8troma st5rose 4struf 3strum
+        6str�g 2st1s6 2stt
+        1stu stu5a 4stuc 2stue 8stun.  2stv 2stw s2tyl
+        6stz 1st� 8st�g
+        1st�
+        1st� 8st�ch 4st�r.
+        1su su2b1 3suc su1e su2fe su8mar 6sumfa 8sumk 2s1unt sup1p2 6s5u6ran
+        6surte 2s1v 2s1w 1sy 8syl.  sy5la syn1 sy2na syne4 s1z s4zend 5s6zene.
+        8szu 1s� 6s5�nd 6s�ugi 6s�u�
+        5s�m 2s1�2b 1s�c s�8di 1s�n 5s��
+        taats3 4tab.  taba6k ta8ban tab2l ta6bre 4tabs t3absc
+        8tabz 6t3acht ta6der 6tadr tad6s tad2t 1tafe4 1tag ta6ga6 ta8gei
+        tage4s tag6s5t tah8 tahl3 tai6ne.  ta5ir.  tak8ta tal3au 1tale ta8leng
+        tal5ert 6t5a6mer 6tamp tampe6 2t1amt tan5d6a tan8dr tands5a tani5e
+        6tanl 2tanr t3ans 8t5antr tanu6 t5anw 8tanwa tan8zw ta8rau 6tarbe
+        1tari 2tark 2t1arm ta1ro 2tart t3arti 6tarz ta1sc ta6sien ta8stem
+        ta8sto t5aufb 4taufn 8taus.  5tause 8tausf 6tausg t5ausl 2t1b2 2t1c
+        t6chu 2t1d te2am tea4s te8ben 5techn 4teff te4g3re te6hau 2tehe te4hel
+        2t1ehr te5id.  teig5l 6teign tei8gr 1teil 4teinh t5einhe 4teis t5eisen
+        8teiw te8lam te4lar 4telek 8telem te6man te6n5ag ten8erw ten5k tens4p
+        ten8tro 4t3entw 8tentz te6pli 5teppi ter5a6b te3ral ter5au 8terbar
+        t5erbe.  6terben 8terbs 4t3erbt t5erde.  ter5ebe ter5ein te8rers terf4
+        8terh� 6terkl� ter8nor ter6re.  t8erscha t5e6sel te8stau
+        t3euro te1xa tex3e 8texp tex6ta 2t1f2 2t1g2 2th.  th6a 5tha.  2thaa
+        6t1hab 6t5haf t5hah 8thak 3thal.  6thals 6t3hand 2t1hau 1the.  3t4hea
+        t1heb t5heil t3heit t3helf 1theo 5therap 5therf 6t5herz 1thes 1thet
+        5thi.  2t1hil t3him 8thir 3this t5hj 2th1l 2th1m th1n t5hob t5hof
+        4tholz 6thopti 1thr6 4ths t1hum 1thy 4t1h� 2t1h� t1h�
+        ti1a2m ti1b tie6fer ti1en ti8gerz tig3l ti8kin ti5lat 1tilg t1ind
+        tin4k3l ti3spa ti5str 5tite ti5tr ti8vel ti8vr 2t1j 2t1k2 2t1l tl8a
+        2t1m8 2t1n 3tobe 8tobj to3cha 5tocht 8tock tode4 to8del to8du to1e
+        6t5o6fen to1in toi6r 5toll.  to8mene t2ons 2t1ony to4per 5topf.  6topt
+        to1ra
+        to1s to2ste
+        to6ska tos2l 2toti to1tr t8ou 2t1p2 6t1q tr6 tra5cha
+        tra8far traf5t 1trag tra6gl tra6gr t3rahm 1trai t6rans tra3sc tra6st
+        3traue t4re.  2trec t3rech t8reck 6t1red t8ree 4t1reg 3treib 4treif
+        8t3reis 8trepo tre6t5r t3rev 4t3rez 1trib t6rick tri6er 2trig t8rink
+        tri6o5d trizi5 tro1a 3troc trocke6 troi8d tro8man.  tro3ny 5tropf
+        6t5rosa t5ro� 5trub 5trup trut5 1tr�g 6t1r�h
+        5tr�b tr�3bu t1r�c t1r�s 2ts ts1ab t1sac tsa8d
+        ts1ak t6s5alt ts1an ts1ar ts3auf t3schr t5sch� tse6e tsee5i
+        tsein6s ts3ent ts1er t8serf t4serk t8sh 5t6sik t4s3int ts5ort.
+        t5s6por t6sprei
+        t1st t2ste
+        t6s5tanz ts1th t6stit t4s3tor 1t2sua t2s1uf
+        t8sum.  t2s1u8n t2s1ur 2t1t tt5eif tte6sa tt1ha tt8ret tt1sc tt8ser
+        tt5s6z 1tuc tuch5a 1tu1e 6tuh t5uhr tu1i tu6it 1tumh 6t5umr 1tums
+        8tumt 6tund 6tunf 2t1unt tu5ra tu6rau tu6re.  tu4r3er 2t1v 2t1w 1ty1
+        ty6a ty8la 8tym 6ty6o 2tz tz5al tz1an tz1ar t8zec tzeh6 tzehn5 t6z5ei.
+        t6zor t4z3um t6z�u 5t�g 6t�h t5�lt t8�n
+        t�re8 8t�8st 6t�u� t5�ffen
+        8t�8k 1t�n 4t�b t6�5ber. 5t�ch 1t�r.
+        u3al.  u5alb u5alf u3alh u5alk u3alp u3an.  ua5na u3and u5ans u5ar.
+        ua6th u1au ua1y u2bab ubi5er.  u6b5rit ubs2k u5b� u8b�b 2uc
+        u1che u6ch5ec u1chi uch1l uch3m uch5n uch1r uch5to ucht5re u1chu uch1w
+        uck1a uck5in u1d ud4a u1ei u6ela uene8 u6ep u1er uer1a ue8rerl uer5o
+        u8esc u2est u8ev u1fa u2f1ei u4f3ent u8ferh uf1fr uf1l uf1ra uf1re
+        uf1r� uf1r� uf1s2p uf1st uft1s u8gabt u8gad u6gap ugeb8 u8gn
+        ugo3s4 u1ha u1he u1hi uh1le u1ho uh1re u1hu uh1w u1h� u1h�
+        6ui ui5en u1ig u3ins uin8tes u5isch.  u1j 6uk u1ke u1ki u1kl u8klu
+        u1k6n u5ky u1la uld8se u1le ul8lac ul6lau ul6le6l ul6lo ulni8 u1lo
+        ulo6i ult6a ult8e u1lu ul2vr u1l� u1l� 3umfan 5umlau umo8f
+        um8pho u1mu umu8s u5m� u1n1a un2al un6at unau2 6und.  5undein
+        un4d3um 3undzw und�8 un8d�b une2b un1ec une2h un3eis 3unfal
+        1unf� 5ungea 3ungl� ung2s1 un8g� 1u2nif un4it un8kro
+        unk5s u1no unpa2 uns2p unvol4 unvoll5 u5os.  u1pa u1pi u1p2l u1pr
+        up4s3t up2t1a u1q u1ra ur5abs ura8d ur5ah u6rak ur3alt u6rana u6r5ans
+        u8rap ur5a6ri u8ratt u1re ur3eig ur8gri u1ri ur5ins 3urlau urmen6
+        ur8nan u1ro 3ursac ur8sau ur8sei ur4sk 3urtei u1ru uru5i6 uru6r u1ry
+        ur2za ur6z� ur5�6m u5r� u1r� ur�ck3 u1sa
+        usa4gi u2s1ar u2s1au u8schec usch5wi u2s1ei use8kel u8sl u4st3a4b
+        us3tau
+        <!-- % u3s4ter  -->
+        u2s1uf u8surn ut1ac u1tal uta8m u1tan ut1ar u1tas ut1au
+        u1te u8teic u4tent u8terf u6terin u4t3hei ut5ho ut1hu u1ti utine5
+        uti6q u1to uto5c u1tr ut1sa ut1s6p ut6stro u1tu utz5w u1u u1v uve5n
+        uve3r4� u1w u1xe u5ya uy5e6 u1yi u2z1eh u8zerh u5� u�e6n
+        u�en5e 8vanb 6vang 6varb var8d va6t5a va8tei
+        va2t1r 2v1b 6v5c 6vd 1ve 6ve5g6 ver1 ver5b verb8l ve2re2 verg8 ve2ru8
+        ve1s ve2s3p ve3xe 2v1f 2v1g 6v5h vi6el vie6w5 vi1g4 vi8leh vil6le.
+        8vint vi1ru vi1tr 2v1k 2v1l 2v1m 4v5n 8vo8f voi6le vol8lend vol8li
+        v2or1 vo2re vo8rin vo2ro 2v1p 8vra v6re
+        2v2s <!-- % 2v1s  -->
+        2v1t 2v1v 4v3w 2v1z
+        waffe8 wa6g5n 1wah wah8n wa5la wal8din wal6ta wan4dr 5ware wa8ru
+        war4za 1was w5c w1d 5wech we6fl 1weg we8geng weg5h weg3l we2g1r
+        weh6r5er 5weise weit3r wel2t welt3r we6rat 8werc 5werdu wer4fl 5werk.
+        wer4ka wer8ku wer4ta wer8term we2sp
+        we8s4tend
+        <!-- % we6s4teu  -->
+        we8str
+        we8st� wet8ta wich6s5t 1wid wi2dr wiede4 wieder5 wik6 wim6ma
+        win4d3r 5wirt wisch5l 1wj 6wk 2w1l 8w1n wo1c woche6 wol6f wor6t5r 6ws2
+        w1sk 6w5t 5wunde.  wun6gr wu1sc wu2t1 6w5w wy5a w�rme5 w�1sc
+        1xag x1ak x3a4men 8xamt x1an 8x1b x1c 1xe.  x3e4g 1xen xe1ro x1erz
+        1xes 8xf x1g 8x1h 1xi 8xid xi8so 4xiste x1k 6x1l x1m 8xn 1xo 8x5o6d
+        8x3p2 x1r x1s6 8x1t x6tak x8terf x2t1h 1xu xu1e x5ul 6x3w x1z 5ya.
+        y5an.  y5ank y1b y1c y6cha y4chia y1d yen6n y5ern y1g y5h y5in y1j
+        y1k2 y1lak yl1al yla8m y5lax y1le y1lo y5lu y8mn ym1p2 y3mu y1na yno2d
+        yn1t y1on.  y1o4p y5ou ypo1 y1pr y8ps y1r yri3e yr1r2
+        <!-- % y1s -->
+        ys5iat ys8ty
+        y1t y3w y1z y�8m z5a6b zab5l 8za6d 1zah za5is 4z3ak 6z1am 5zange.
+        8zanl 2z1ara 6z5as z5auf 3zaun 2z1b 6z1c 6z1d 1ze ze4dik 4z3eff 8zein
+        zei4ta zei8ters ze6la ze8lec zel8th 4zemp 6z5engel zen8zin 8zerg�
+        zer8i ze1ro zers8 zerta8 zer8tab zer8tag 8zerz ze8ste zeu6gr 2z1ex
+        2z1f8 z1g 4z1h 1zi zi1en zi5es.  4z3imp zi1na 6z5inf 6z5inni zin6s5er
+        8zinsuf zist5r zi5th zi1tr 6z1j 2z1k 2z1l 2z1m 6z1n 1zo zo6gl 4z3oh
+        zo1on zor6na8 4z1p z5q 6z1r 2z1s8 2z1t z4t3end z4t3hei z8thi 1zu zu3al
+        zu1b4 zu1f2 6z5uhr zun2a 8zunem zunf8 8zungl zu1o zup8fi zu1s8 zu1z
+        2z1v zw8 z1wal 5zweck zwei3s z1wel z1wer z6werg 8z5wes 1zwi zwi1s
+        6z1wo 1zy 2z1z zz8a zzi1s 1z� 1z� 6z�l. z�1le
+        1z� 2z1�2b �1a6 �b1l �1che �3chi
+        �ch8sc �ch8sp �5chu �ck5a �d1a �d5era
+        �6d5ia �1e �5fa �f1l �ft6s �g1h
+        �g3le �6g5nan �g5str �1he �1hi �h1le
+        �h5ne 1�hnl �h1re �h5ri �h1ru �1hu
+        �h1w 6�i �1isc �6ische �5ism �5j
+        �1k �l1c �1le �8lei �l6schl �mi1e
+        �m8n �m8s �5na 5�nderu �ne5i8 �ng3l
+        �nk5l �1no �n6s5c �1pa �p6s5c 3�q
+        �r1c �1re �re8m 5�rgern �r6gl �1ri
+        3�rmel �1ro �rt6s5 �1ru 3�rztl �5r�
+        �6s5chen �sen8s �s1th �ta8b �1te �teri4
+        �ter5it �6thy �1ti 3�tk �1to �t8schl
+        �ts1p �5tu �ub1l �u1e 1�ug �u8ga
+        �u5i �1um. �1us. 1�u� �1z
+        �1b �1che �5chi
+        �ch8s2tei <!-- % �ch8stei  -->
+        �ch8str �cht6
+        5�6dem 5�ffn �1he �h1l8 �h1re �1hu
+        �1is �1ke 1�2ko 1�l. �l6k5l �l8pl
+        �1mu �5na �nig6s3 �1no �5o6t �pf3l
+        �p6s5c �1re �r8gli �1ri �r8tr �1ru
+        5�sterr �1te �5th �1ti �1tu �1v �1w
+        �we8 �2z �b6e2 3�4ber1 �b1l �b1r
+        5�2bu �1che �1chi �8ch3l �ch6s5c �8ck
+        �ck1a �ck5ers �d1a2 �6deu �di8t �2d1o4
+        �d5s6 �ge4l5a �g1l �h5a �1he �8heh
+        �6h5erk �h1le �h1re �h1ru �1hu �h1w
+        �3k �1le �l4l5a �l8lo �l4ps �l6s5c
+        �1lu �n8da �n8fei �nk5l �n8za �n6zw
+        �5pi �1re �8rei �r8fl �r8fr �r8geng
+        �1ri �1ro �r8sta
+        <!-- % �r8ster  -->
+        �1ru �se8n
+        �8sta �8stes
+        <!-- % �6s5tete -->
+        �3ta �1te �1ti
+        �t8tr �1tu �t8zei �1v �1a8 5�a.
+        �8as �1b8 �1c �1d
+        1�e �5ec 8�e8g 8�e8h
+        2�1ei 8�em �1f8 �1g �1h
+        1�i �1k �1l �1m
+        <!-- % �mana8 % ueberfluessig ? -->
+        �1n �1o �1p8 �5q
+        �1r �1s2 �st8 �1ta
+        �1te �t3hei �1ti �5to
+        �1tr 1�u8 6�5um �1v �1w
+        �1z
+        <!-- % st -->
+        2s1ta.
+        i2s1tal
+        2s1tani 2s1tan.
+        fe2s1ta
+        te2s1ta
+
+        nd2ste
+        ve2ste
+        3s2tec
+        4s3techn
+        3s2teg
+        3s2teh
+        3s2tein 3s2teig 3s2teif
+        3s2tell 3s2telz
+        a4s3tel
+        3s2temm
+        3s2temp
+        3s2tep
+        s3s2ter t3s2tern
+        3s2teue
+        6s4teuro
+
+        bs2ti
+        te2s3ti
+        ve2sti
+        3s2tic
+        <!-- % 3s2tiefel -->
+        3s2tieb
+        3s2tieg
+        <!-- % 3s2tiel s.o. -->
+        3s2tif
+        3s2til
+        3s2tim
+        3s2tink
+        3s2titu
+        <!-- % -->
+        a2s1to
+        gu2s1to <!-- % R28 -->
+        ku2s1to <!-- % R28 -->
+        i2s1tol i2s1tor
+        ve2s1to
+        <!-- % -->
+        2s1tung
+        2s7tus
+        o2s1tul <!-- % R27 -->
+        <!-- % -->
+        <!-- % eszet -->
+        <!-- % -->
+        aus3s4
+        ens3s4
+        gs3s4
+        .mis2s1
+        s2s1b8 <!-- % ss1b8 %�1b8 -->
+        <!-- % �1c -->
+        s2s3chen
+        s2s3d <!-- % �1d  -->
+        s2s5ec <!-- % �5ec -->
+        <!-- % 8sse8g % 8�e8g -->
+        <!-- % 8sse8h % 8�e8h -->
+        2s2s1ei <!-- % 2�1ei  -->
+        s2s3f <!-- % ss1f % �1f8 -->
+        s2s1g <!-- % ss1g % �1g  -->
+        s2s3h <!-- %�1h  -->
+        s2s3k <!-- % �1k  -->
+        s2s3l <!-- % �1l  -->
+        s2s3m <!-- % �1m -->
+        <!-- % ssmana8 % �mana8 % ueberfluessig ? -->
+        s2s3n <!-- % �1n  -->
+        s2s3p8 <!-- % �1p8  -->
+        s2s5q <!-- % �5q  -->
+        s2s3r <!-- % �1r  -->
+        s2s3s2 <!-- % �1s2  -->
+        sss2t8 <!-- % �st8  -->
+        <!-- % �1ta -->
+        <!-- % �1te -->
+        as2s3te
+        is2s3te
+        us2s3te
+        �s2s3te
+        s2st3hei <!-- % �t3hei  -->
+        s2s3ti <!-- % �1ti -->
+        s2s1to <!-- % �5to -->
+        s2s1tr <!-- % �1tr  -->
+        <!-- % aber: ...s-sung -->
+        6ss5um <!-- % 6�5um -->
+        s2s3v <!-- % �1v  -->
+        s2s3w <!-- % �1w  -->
+        s2s3z <!-- % ss1z �1z  -->
+        <!-- % -->
+        <!-- % ck -->
+        <!-- % -->
+        1cker.
+        1ckert
+        1ckad
+        1cke.
+        1ckel
+        1cken
+        4ck1ent
+        1ckere
+        1ckern
+        1ckeru
+        1ckie
+        1ckig
+        1ckun
+        <!--
+        }
+        \endgroup
+        \relax\endinput
+        %
+        %
+        % =============== Additional Documentation ===============
+        %
+        %
+        % Necessary Settings in TeX macro files:
+        %
+        % \lefthyphenmin, \righthyphenmin:
+        %   You can set both parameters to 2.
+        %   The new German orthography allows hyphenation after the first
+        %   letter, i.e. \lefthyphenmin=1.  Despite being ugly, this may
+        %   ease line breaking in narrow columns.  However, the current
+        %   patterns do *** NOT *** support this!
+        %
+        % \lccode <char>:
+        %   To get correct hyphenation points within words containing
+        %   umlauts or \ss, it's necessary to assign values > 0 to the
+        %   appropriate \lccode <char> positions.
+        %   These changes are _not_ done when reading this file and have to
+        %   be included in the language switching mechanism as is done in,
+        %   for example, `german.sty' (\lccode change for ^^Y = \ss in OT1,
+        %   \left-/\righthyphenmin settings).
+        %
+        %
+        % Warum ist diese Version "vorlaeufig" ?
+        %
+        % Diese Trennmuster fuer die neue deutsche Rechtschreibung
+        % entstanden durch manuelle Ueberarbeitung der Muster fuer die
+        % traditionellen Schreibregeln.  Sie sind deshalb statistisch
+        % nicht optimal.
+        %
+        % Die neuerdings erlaubten Trennstellen nach dem ersten Buchstaben
+        % eines Wortes fehlen hier; ihre Implementierung wuerde eine
+        % Neuberechnung der Muster erfordern.
+        %
+        % Wo die neue Rechtschreibung sowohl die "traditionelle", also
+        % etymologisch richtige Trennung, als auch eine Trennung nach
+        % Sprechsilben erlaubt, wird weiterhin "traditionell" getrennt.
+        % (Beachte:  In Woertern lateinischen Ursprungs wird "str" jetzt
+        % als "s-tr" getrennt, ausser natuerlich am Anfang eines Wortstamms:
+        % "Demons-tration", �us-tralien" usw., aber "de-struktiv".)
+        %
+        % Sobald Trennmuster fuer die neue deutsche Rechtschreibung
+        % verfuegbar sind, die von Grund auf neu berechnet wurden, sollten
+        % die vorliegenden Muster nicht laenger verwendet werden.
+        %
+        %
+        % Versionsgeschichte
+        %
+        % 1998-02-15  beta 19  gnhyph01.tex
+        % 1998-02-24  beta 20
+        % 1998-04-28       21
+        % 1998-05-28       22
+        % 1998-08-23       26
+        % 1999-03-08       28  Umbenennung `gnhyph01' in `dehyphn',
+        %                      korrigierte Makros aus `dehypht',
+        %                      Lockerung des Copyright auf die Bedingungen
+        %                      der `LaTeX Project Public License'
+        %
+        %
+        %% \CharacterTable
+        %%  {Upper-case    \A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z
+        %%   Lower-case    \a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z
+        %%   Digits        \1\2\3\4\5\6\7\8\9\0
+        %%   Exclamation   \!     Double quote  \"     Hash (number) \#
+        %%   Dollar        \$     Percent       \%     Ampersand     \&
+        %%   Acute accent  \'     Left paren    \(     Right paren   \)
+        %%   Asterisk      \*     Plus          \+     Comma         \,
+        %%   Minus         \-     Point         \.     Solidus       \/
+        %%   Colon         \:     Semicolon     \;     Less than     \<
+        %%   Equals        \=     Greater than  \>     Question mark \?
+        %%   Commercial at \@     Left bracket  \[     Backslash     \\
+        %%   Right bracket \]     Circumflex    \^     Underscore    \_
+        %%   Grave accent  \`     Left brace    \{     Vertical bar  \|
+        %%   Right brace   \}     Tilde         \~}
+        %%
+        \endinput
+        %%
+        %% End of file `dehyphn.tex'.
+        -->
+    </patterns>
+</hyphenation-info>

+ 58 - 1
test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.json

@@ -21,7 +21,52 @@
                 "dict_dec":{
                     "type":"dictionary_decompounder",
                     "word_list":["donau", "dampf", "schiff", "spargel", "creme", "suppe"]
-                }
+                },
+              "hyphenation_dec_only_longest_match": {
+                "type": "hyphenation_decompounder",
+                "hyphenation_patterns_path": "de_DR.xml",
+                "word_list": [
+                  "fuss",
+                  "fussball",
+                  "ballpumpe",
+                  "ball",
+                  "pumpe",
+                  "kaffee",
+                  "fee",
+                  "maschine"
+                ],
+                "only_longest_match": true
+              },
+              "hyphenation_dec_no_sub_matches": {
+                "type": "hyphenation_decompounder",
+                "hyphenation_patterns_path": "de_DR.xml",
+                "word_list": [
+                  "fuss",
+                  "fussball",
+                  "ballpumpe",
+                  "ball",
+                  "pumpe",
+                  "kaffee",
+                  "fee",
+                  "maschine"
+                ],
+                "no_sub_matches": true
+              },
+              "hyphenation_dec_no_overlapping_matches": {
+                "type": "hyphenation_decompounder",
+                "hyphenation_patterns_path": "de_DR.xml",
+                "word_list": [
+                  "fuss",
+                  "fussball",
+                  "ballpumpe",
+                  "ball",
+                  "pumpe",
+                  "kaffee",
+                  "fee",
+                  "maschine"
+                ],
+                "no_overlapping_matches": true
+              }
             },
             "analyzer":{
                 "standard":{
@@ -47,6 +92,18 @@
                 "decompoundingAnalyzer":{
                     "tokenizer":"standard",
                     "filter":["dict_dec"]
+                },
+                "hyphenationDecompoundingAnalyzerOnlyLongestMatch":{
+                    "tokenizer":"standard",
+                    "filter":["hyphenation_dec_only_longest_match"]
+                },
+                "hyphenationDecompoundingAnalyzerNoSubMatches": {
+                    "tokenizer":"standard",
+                    "filter":["hyphenation_dec_no_sub_matches"]
+                },
+                "hyphenationDecompoundingAnalyzerNoOverlappingMatches":{
+                    "tokenizer":"standard",
+                    "filter":["hyphenation_dec_no_overlapping_matches"]
                 }
             }
         }

+ 25 - 0
test/framework/src/main/resources/org/elasticsearch/analysis/common/test1.yml

@@ -15,6 +15,21 @@ index :
       dict_dec :
         type : dictionary_decompounder
         word_list : [donau, dampf, schiff, spargel, creme, suppe]
+      hyphenation_dec_only_longest_match :
+        type : hyphenation_decompounder
+        hyphenation_patterns_path : de_DR.xml
+        word_list : [fuss, fussball, ballpumpe, ball, pumpe, kaffee, fee, maschine]
+        only_longest_match : true
+      hyphenation_dec_no_sub_matches :
+        type : hyphenation_decompounder
+        hyphenation_patterns_path : de_DR.xml
+        word_list : [fuss, fussball, ballpumpe, ball, pumpe, kaffee, fee, maschine]
+        no_sub_matches : true
+      hyphenation_dec_no_overlapping_matches :
+        type : hyphenation_decompounder
+        hyphenation_patterns_path : de_DR.xml
+        word_list : [fuss, fussball, ballpumpe, ball, pumpe, kaffee, fee, maschine]
+        no_overlapping_matches: true
     analyzer :
       standard :
         type : standard
@@ -37,3 +52,13 @@ index :
       decompoundingAnalyzer :
         tokenizer : standard
         filter : [dict_dec]
+      hyphenationDecompoundingAnalyzerOnlyLongestMatch :
+        tokenizer : standard
+        filter : [hyphenation_dec_only_longest_match]
+      hyphenationDecompoundingAnalyzerNoSubMatches:
+        tokenizer: standard
+        filter : [hyphenation_dec_no_sub_matches]
+      hyphenationDecompoundingAnalyzerNoOverlappingMatches:
+        tokenizer: standard
+        filter : [hyphenation_dec_no_overlapping_matches]
+