|
@@ -9,20 +9,6 @@ Input text is lowercased, normalized to remove extended characters, sorted,
|
|
|
deduplicated and concatenated into a single token. If a stopword list is
|
|
|
configured, stop words will also be removed.
|
|
|
|
|
|
-[float]
|
|
|
-=== Definition
|
|
|
-
|
|
|
-It consists of:
|
|
|
-
|
|
|
-Tokenizer::
|
|
|
-* <<analysis-standard-tokenizer,Standard Tokenizer>>
|
|
|
-
|
|
|
-Token Filters (in order)::
|
|
|
-1. <<analysis-lowercase-tokenfilter,Lower Case Token Filter>>
|
|
|
-2. <<analysis-asciifolding-tokenfilter>>
|
|
|
-3. <<analysis-stop-tokenfilter,Stop Token Filter>> (disabled by default)
|
|
|
-4. <<analysis-fingerprint-tokenfilter>>
|
|
|
-
|
|
|
[float]
|
|
|
=== Example output
|
|
|
|
|
@@ -149,3 +135,46 @@ The above example produces the following term:
|
|
|
---------------------------
|
|
|
[ consistent godel said sentence yes ]
|
|
|
---------------------------
|
|
|
+
|
|
|
+[float]
|
|
|
+=== Definition
|
|
|
+
|
|
|
+The `fingerprint` tokenizer consists of:
|
|
|
+
|
|
|
+Tokenizer::
|
|
|
+* <<analysis-standard-tokenizer,Standard Tokenizer>>
|
|
|
+
|
|
|
+Token Filters (in order)::
|
|
|
+* <<analysis-lowercase-tokenfilter,Lower Case Token Filter>>
|
|
|
+* <<analysis-asciifolding-tokenfilter>>
|
|
|
+* <<analysis-stop-tokenfilter,Stop Token Filter>> (disabled by default)
|
|
|
+* <<analysis-fingerprint-tokenfilter>>
|
|
|
+
|
|
|
+If you need to customize the `fingerprint` analyzer beyond the configuration
|
|
|
+parameters then you need to recreate it as a `custom` analyzer and modify
|
|
|
+it, usually by adding token filters. This would recreate the built-in
|
|
|
+`fingerprint` analyzer and you can use it as a starting point for further
|
|
|
+customization:
|
|
|
+
|
|
|
+[source,js]
|
|
|
+----------------------------------------------------
|
|
|
+PUT /fingerprint_example
|
|
|
+{
|
|
|
+ "settings": {
|
|
|
+ "analysis": {
|
|
|
+ "analyzer": {
|
|
|
+ "rebuilt_fingerprint": {
|
|
|
+ "tokenizer": "standard",
|
|
|
+ "filter": [
|
|
|
+ "lowercase",
|
|
|
+ "asciifolding",
|
|
|
+ "fingerprint"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+----------------------------------------------------
|
|
|
+// CONSOLE
|
|
|
+// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: fingerprint_example, first: fingerprint, second: rebuilt_fingerprint}\nendyaml\n/]
|