|
@@ -1,9 +1,147 @@
|
|
|
[[analysis-classic-tokenfilter]]
|
|
|
-=== Classic Token Filter
|
|
|
+=== Classic token filter
|
|
|
+++++
|
|
|
+<titleabbrev>Classic</titleabbrev>
|
|
|
+++++
|
|
|
|
|
|
-The `classic` token filter does optional post-processing of
|
|
|
-terms that are generated by the <<analysis-classic-tokenizer,`classic` tokenizer>>.
|
|
|
+Performs optional post-processing of terms generated by the
|
|
|
+<<analysis-classic-tokenizer,`classic` tokenizer>>.
|
|
|
|
|
|
-This filter removes the english possessive from the end of words, and
|
|
|
-it removes dots from acronyms.
|
|
|
+This filter removes the english possessive (`'s`) from the end of words and
|
|
|
+removes dots from acronyms. It uses Lucene's
|
|
|
+https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/standard/ClassicFilter.html[ClassicFilter].
|
|
|
+
|
|
|
+[[analysis-classic-tokenfilter-analyze-ex]]
|
|
|
+==== Example
|
|
|
+
|
|
|
+The following <<indices-analyze,analyze API>> request demonstrates how the
|
|
|
+classic token filter works.
|
|
|
+
|
|
|
+[source,console]
|
|
|
+--------------------------------------------------
|
|
|
+GET /_analyze
|
|
|
+{
|
|
|
+ "tokenizer" : "classic",
|
|
|
+ "filter" : ["classic"],
|
|
|
+ "text" : "The 2 Q.U.I.C.K. Brown-Foxes jumped over the lazy dog's bone."
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+The filter produces the following tokens:
|
|
|
+
|
|
|
+[source,text]
|
|
|
+--------------------------------------------------
|
|
|
+[ The, 2, QUICK, Brown, Foxes, jumped, over, the, lazy, dog, bone ]
|
|
|
+--------------------------------------------------
|
|
|
+
|
|
|
+/////////////////////
|
|
|
+[source,console-result]
|
|
|
+--------------------------------------------------
|
|
|
+{
|
|
|
+ "tokens" : [
|
|
|
+ {
|
|
|
+ "token" : "The",
|
|
|
+ "start_offset" : 0,
|
|
|
+ "end_offset" : 3,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 0
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "2",
|
|
|
+ "start_offset" : 4,
|
|
|
+ "end_offset" : 5,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 1
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "QUICK",
|
|
|
+ "start_offset" : 6,
|
|
|
+ "end_offset" : 16,
|
|
|
+ "type" : "<ACRONYM>",
|
|
|
+ "position" : 2
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "Brown",
|
|
|
+ "start_offset" : 17,
|
|
|
+ "end_offset" : 22,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 3
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "Foxes",
|
|
|
+ "start_offset" : 23,
|
|
|
+ "end_offset" : 28,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 4
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "jumped",
|
|
|
+ "start_offset" : 29,
|
|
|
+ "end_offset" : 35,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 5
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "over",
|
|
|
+ "start_offset" : 36,
|
|
|
+ "end_offset" : 40,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 6
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "the",
|
|
|
+ "start_offset" : 41,
|
|
|
+ "end_offset" : 44,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 7
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "lazy",
|
|
|
+ "start_offset" : 45,
|
|
|
+ "end_offset" : 49,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 8
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "dog",
|
|
|
+ "start_offset" : 50,
|
|
|
+ "end_offset" : 55,
|
|
|
+ "type" : "<APOSTROPHE>",
|
|
|
+ "position" : 9
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "token" : "bone",
|
|
|
+ "start_offset" : 56,
|
|
|
+ "end_offset" : 60,
|
|
|
+ "type" : "<ALPHANUM>",
|
|
|
+ "position" : 10
|
|
|
+ }
|
|
|
+ ]
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
+/////////////////////
|
|
|
+
|
|
|
+[[analysis-classic-tokenfilter-analyzer-ex]]
|
|
|
+==== Add to an analyzer
|
|
|
+
|
|
|
+The following <<indices-create-index,create index API>> request uses the
|
|
|
+classic token filter to configure a new
|
|
|
+<<analysis-custom-analyzer,custom analyzer>>.
|
|
|
+
|
|
|
+[source,console]
|
|
|
+--------------------------------------------------
|
|
|
+PUT /classic_example
|
|
|
+{
|
|
|
+ "settings" : {
|
|
|
+ "analysis" : {
|
|
|
+ "analyzer" : {
|
|
|
+ "classic_analyzer" : {
|
|
|
+ "tokenizer" : "classic",
|
|
|
+ "filter" : ["classic"]
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+--------------------------------------------------
|
|
|
|