|
@@ -0,0 +1,73 @@
|
|
|
+/*
|
|
|
+ * Licensed to Elasticsearch under one or more contributor
|
|
|
+ * license agreements. See the NOTICE file distributed with
|
|
|
+ * this work for additional information regarding copyright
|
|
|
+ * ownership. Elasticsearch licenses this file to you under
|
|
|
+ * the Apache License, Version 2.0 (the "License"); you may
|
|
|
+ * not use this file except in compliance with the License.
|
|
|
+ * You may obtain a copy of the License at
|
|
|
+ *
|
|
|
+ * http://www.apache.org/licenses/LICENSE-2.0
|
|
|
+ *
|
|
|
+ * Unless required by applicable law or agreed to in writing,
|
|
|
+ * software distributed under the License is distributed on an
|
|
|
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
|
+ * KIND, either express or implied. See the License for the
|
|
|
+ * specific language governing permissions and limitations
|
|
|
+ * under the License.
|
|
|
+ */
|
|
|
+
|
|
|
+package org.elasticsearch.index.analysis;
|
|
|
+
|
|
|
+import java.io.IOException;
|
|
|
+
|
|
|
+import org.apache.lucene.analysis.CannedTokenStream;
|
|
|
+import org.apache.lucene.analysis.Token;
|
|
|
+import org.apache.lucene.analysis.TokenStream;
|
|
|
+import org.elasticsearch.common.settings.Settings;
|
|
|
+import org.elasticsearch.index.Index;
|
|
|
+import org.elasticsearch.index.IndexSettings;
|
|
|
+import org.elasticsearch.test.ESTokenStreamTestCase;
|
|
|
+import org.elasticsearch.test.IndexSettingsModule;
|
|
|
+
|
|
|
+public class FlattenGraphTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
|
|
+
|
|
|
+ public void testBasic() throws IOException {
|
|
|
+
|
|
|
+ Index index = new Index("test", "_na_");
|
|
|
+ String name = "ngr";
|
|
|
+ Settings indexSettings = newAnalysisSettingsBuilder().build();
|
|
|
+ IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
|
|
|
+ Settings settings = newAnalysisSettingsBuilder().build();
|
|
|
+
|
|
|
+ // "wow that's funny" and "what the fudge" are separate side paths, in parallel with "wtf", on input:
|
|
|
+ TokenStream in = new CannedTokenStream(0, 12, new Token[] {
|
|
|
+ token("wtf", 1, 5, 0, 3),
|
|
|
+ token("what", 0, 1, 0, 3),
|
|
|
+ token("wow", 0, 3, 0, 3),
|
|
|
+ token("the", 1, 1, 0, 3),
|
|
|
+ token("fudge", 1, 3, 0, 3),
|
|
|
+ token("that's", 1, 1, 0, 3),
|
|
|
+ token("funny", 1, 1, 0, 3),
|
|
|
+ token("happened", 1, 1, 4, 12)
|
|
|
+ });
|
|
|
+
|
|
|
+ TokenStream tokens = new FlattenGraphTokenFilterFactory(indexProperties, null, name, settings).create(in);
|
|
|
+
|
|
|
+ // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
|
|
|
+ assertTokenStreamContents(tokens,
|
|
|
+ new String[] {"wtf", "what", "wow", "the", "that's", "fudge", "funny", "happened"},
|
|
|
+ new int[] {0, 0, 0, 0, 0, 0, 0, 4},
|
|
|
+ new int[] {3, 3, 3, 3, 3, 3, 3, 12},
|
|
|
+ new int[] {1, 0, 0, 1, 0, 1, 0, 1},
|
|
|
+ new int[] {3, 1, 1, 1, 1, 1, 1, 1},
|
|
|
+ 12);
|
|
|
+ }
|
|
|
+
|
|
|
+ private static Token token(String term, int posInc, int posLength, int startOffset, int endOffset) {
|
|
|
+ final Token t = new Token(term, startOffset, endOffset);
|
|
|
+ t.setPositionIncrement(posInc);
|
|
|
+ t.setPositionLength(posLength);
|
|
|
+ return t;
|
|
|
+ }
|
|
|
+}
|