|
@@ -1,129 +0,0 @@
|
|
|
-/*
|
|
|
- * Licensed to Elasticsearch under one or more contributor
|
|
|
- * license agreements. See the NOTICE file distributed with
|
|
|
- * this work for additional information regarding copyright
|
|
|
- * ownership. Elasticsearch licenses this file to you under
|
|
|
- * the Apache License, Version 2.0 (the "License"); you may
|
|
|
- * not use this file except in compliance with the License.
|
|
|
- * You may obtain a copy of the License at
|
|
|
- *
|
|
|
- * http://www.apache.org/licenses/LICENSE-2.0
|
|
|
- *
|
|
|
- * Unless required by applicable law or agreed to in writing,
|
|
|
- * software distributed under the License is distributed on an
|
|
|
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
|
- * KIND, either express or implied. See the License for the
|
|
|
- * specific language governing permissions and limitations
|
|
|
- * under the License.
|
|
|
- */
|
|
|
-
|
|
|
-package org.apache.lucene.analysis;
|
|
|
-
|
|
|
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|
|
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|
|
-
|
|
|
-import java.io.IOException;
|
|
|
-import java.util.Collections;
|
|
|
-import java.util.Iterator;
|
|
|
-
|
|
|
-/**
|
|
|
- * This {@link Analyzer} wraps another analyzer and adds a set of prefixes to the
|
|
|
- * underlying TokenStream. While these prefixes are iterated the position attribute
|
|
|
- * will not be incremented. Also each prefix will be separated from the other tokens
|
|
|
- * by a separator character.
|
|
|
- * NOTE: The sequence of prefixes needs to be not empty
|
|
|
- */
|
|
|
-public class PrefixAnalyzer extends Analyzer {
|
|
|
-
|
|
|
- private final char separator;
|
|
|
- private final Iterable<? extends CharSequence> prefix;
|
|
|
- private final Analyzer analyzer;
|
|
|
-
|
|
|
- /**
|
|
|
- * Create a new {@link PrefixAnalyzer}. The separator will be set to the DEFAULT_SEPARATOR.
|
|
|
- *
|
|
|
- * @param analyzer {@link Analyzer} to wrap
|
|
|
- * @param prefix Single prefix
|
|
|
- */
|
|
|
- public PrefixAnalyzer(Analyzer analyzer, char separator, CharSequence prefix) {
|
|
|
- this(analyzer, separator, Collections.singleton(prefix));
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * Create a new {@link PrefixAnalyzer}. The separator will be set to the DEFAULT_SEPARATOR.
|
|
|
- *
|
|
|
- * @param analyzer {@link Analyzer} to wrap
|
|
|
- * @param prefix {@link Iterable} of {@link CharSequence} which keeps all prefixes
|
|
|
- */
|
|
|
- public PrefixAnalyzer(Analyzer analyzer, char separator, Iterable<? extends CharSequence> prefix) {
|
|
|
- super();
|
|
|
- this.analyzer = analyzer;
|
|
|
- this.prefix = prefix;
|
|
|
- this.separator = separator;
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- protected TokenStreamComponents createComponents(String fieldName) {
|
|
|
- TokenStreamComponents createComponents = analyzer.createComponents(fieldName);
|
|
|
- TokenStream stream = new PrefixTokenFilter(createComponents.getTokenStream(), separator, prefix);
|
|
|
- TokenStreamComponents tsc = new TokenStreamComponents(createComponents.getTokenizer(), stream);
|
|
|
- return tsc;
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * The {@link PrefixTokenFilter} wraps a {@link TokenStream} and adds a set
|
|
|
- * prefixes ahead. The position attribute will not be incremented for the prefixes.
|
|
|
- */
|
|
|
- public static final class PrefixTokenFilter extends TokenFilter {
|
|
|
-
|
|
|
- private final char separator;
|
|
|
- private final CharTermAttribute termAttr = addAttribute(CharTermAttribute.class);
|
|
|
- private final PositionIncrementAttribute posAttr = addAttribute(PositionIncrementAttribute.class);
|
|
|
- private final Iterable<? extends CharSequence> prefixes;
|
|
|
-
|
|
|
- private Iterator<? extends CharSequence> currentPrefix;
|
|
|
-
|
|
|
- /**
|
|
|
- * Create a new {@link PrefixTokenFilter}. The separator will be set to the DEFAULT_SEPARATOR.
|
|
|
- *
|
|
|
- * @param input {@link TokenStream} to wrap
|
|
|
- * @param separator Character used separate prefixes from other tokens
|
|
|
- * @param prefixes {@link Iterable} of {@link CharSequence} which keeps all prefixes
|
|
|
- */
|
|
|
- public PrefixTokenFilter(TokenStream input, char separator, Iterable<? extends CharSequence> prefixes) {
|
|
|
- super(input);
|
|
|
- this.prefixes = prefixes;
|
|
|
- this.currentPrefix = null;
|
|
|
- this.separator = separator;
|
|
|
- if (prefixes == null || !prefixes.iterator().hasNext()) {
|
|
|
- throw new IllegalArgumentException("one or more prefixes needed");
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public boolean incrementToken() throws IOException {
|
|
|
- if (currentPrefix != null) {
|
|
|
- if (!currentPrefix.hasNext()) {
|
|
|
- return input.incrementToken();
|
|
|
- } else {
|
|
|
- posAttr.setPositionIncrement(0);
|
|
|
- }
|
|
|
- } else {
|
|
|
- currentPrefix = prefixes.iterator();
|
|
|
- termAttr.setEmpty();
|
|
|
- posAttr.setPositionIncrement(1);
|
|
|
- assert (currentPrefix.hasNext()) : "one or more prefixes needed";
|
|
|
- }
|
|
|
- termAttr.setEmpty();
|
|
|
- termAttr.append(currentPrefix.next());
|
|
|
- termAttr.append(separator);
|
|
|
- return true;
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public void reset() throws IOException {
|
|
|
- super.reset();
|
|
|
- currentPrefix = null;
|
|
|
- }
|
|
|
- }
|
|
|
-}
|