|
|
@@ -111,5 +111,107 @@
|
|
|
- match: { tokens.0.position: 0 }
|
|
|
- match: { tokens.1.token: "line" }
|
|
|
- match: { tokens.1.start_offset: 10 }
|
|
|
- - match: { tokens.1.end_offset: 14 }
|
|
|
+ - match: { tokens.1.end_offset: 26 }
|
|
|
- match: { tokens.1.position: 1 }
|
|
|
+
|
|
|
+---
|
|
|
+"Test 7.14 analyzer with multiple multiline messages":
|
|
|
+ - do:
|
|
|
+ indices.analyze:
|
|
|
+ body: >
|
|
|
+ {
|
|
|
+ "char_filter" : [
|
|
|
+ "first_non_blank_line"
|
|
|
+ ],
|
|
|
+ "tokenizer" : "ml_standard",
|
|
|
+ "filter" : [
|
|
|
+ { "type" : "stop", "stopwords": [
|
|
|
+ "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday",
|
|
|
+ "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun",
|
|
|
+ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December",
|
|
|
+ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
|
|
|
+ "GMT", "UTC"
|
|
|
+ ] }
|
|
|
+ ],
|
|
|
+ "text" : [
|
|
|
+ " \nfirst line\nsecond line",
|
|
|
+ " \nfirst line of second message\nsecond line of second message"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ - match: { tokens.0.token: "first" }
|
|
|
+ - match: { tokens.0.start_offset: 4 }
|
|
|
+ - match: { tokens.0.end_offset: 9 }
|
|
|
+ - match: { tokens.0.position: 0 }
|
|
|
+ - match: { tokens.1.token: "line" }
|
|
|
+ - match: { tokens.1.start_offset: 10 }
|
|
|
+ - match: { tokens.1.end_offset: 26 }
|
|
|
+ - match: { tokens.1.position: 1 }
|
|
|
+ - match: { tokens.2.token: "first" }
|
|
|
+ - match: { tokens.2.start_offset: 31 }
|
|
|
+ - match: { tokens.2.end_offset: 36 }
|
|
|
+ - match: { tokens.2.position: 102 }
|
|
|
+ - match: { tokens.3.token: "line" }
|
|
|
+ - match: { tokens.3.start_offset: 37 }
|
|
|
+ - match: { tokens.3.end_offset: 41 }
|
|
|
+ - match: { tokens.3.position: 103 }
|
|
|
+ - match: { tokens.4.token: "of" }
|
|
|
+ - match: { tokens.4.start_offset: 42 }
|
|
|
+ - match: { tokens.4.end_offset: 44 }
|
|
|
+ - match: { tokens.4.position: 104 }
|
|
|
+ - match: { tokens.5.token: "second" }
|
|
|
+ - match: { tokens.5.start_offset: 45 }
|
|
|
+ - match: { tokens.5.end_offset: 51 }
|
|
|
+ - match: { tokens.5.position: 105 }
|
|
|
+ - match: { tokens.6.token: "message" }
|
|
|
+ - match: { tokens.6.start_offset: 52 }
|
|
|
+ - match: { tokens.6.end_offset: 89 }
|
|
|
+ - match: { tokens.6.position: 106 }
|
|
|
+
|
|
|
+---
|
|
|
+"Test 7.14 analyzer with stop words in messages":
|
|
|
+ - do:
|
|
|
+ indices.analyze:
|
|
|
+ body: >
|
|
|
+ {
|
|
|
+ "char_filter" : [
|
|
|
+ "first_non_blank_line"
|
|
|
+ ],
|
|
|
+ "tokenizer" : "ml_standard",
|
|
|
+ "filter" : [
|
|
|
+ { "type" : "stop", "stopwords": [
|
|
|
+ "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday",
|
|
|
+ "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun",
|
|
|
+ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December",
|
|
|
+ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
|
|
|
+ "GMT", "UTC"
|
|
|
+ ] }
|
|
|
+ ],
|
|
|
+ "text" : [
|
|
|
+ "May 27, 2021 @ 19:51:15.288 UTC log message one",
|
|
|
+ "May 27, 2021 @ 19:52:25.288 UTC log message two"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ - match: { tokens.0.token: "log" }
|
|
|
+ - match: { tokens.0.start_offset: 32 }
|
|
|
+ - match: { tokens.0.end_offset: 35 }
|
|
|
+ - match: { tokens.0.position: 7 }
|
|
|
+ - match: { tokens.1.token: "message" }
|
|
|
+ - match: { tokens.1.start_offset: 36 }
|
|
|
+ - match: { tokens.1.end_offset: 43 }
|
|
|
+ - match: { tokens.1.position: 8 }
|
|
|
+ - match: { tokens.2.token: "one" }
|
|
|
+ - match: { tokens.2.start_offset: 44 }
|
|
|
+ - match: { tokens.2.end_offset: 47 }
|
|
|
+ - match: { tokens.2.position: 9 }
|
|
|
+ - match: { tokens.3.token: "log" }
|
|
|
+ - match: { tokens.3.start_offset: 80 }
|
|
|
+ - match: { tokens.3.end_offset: 83 }
|
|
|
+ - match: { tokens.3.position: 117 }
|
|
|
+ - match: { tokens.4.token: "message" }
|
|
|
+ - match: { tokens.4.start_offset: 84 }
|
|
|
+ - match: { tokens.4.end_offset: 91 }
|
|
|
+ - match: { tokens.4.position: 118 }
|
|
|
+ - match: { tokens.5.token: "two" }
|
|
|
+ - match: { tokens.5.start_offset: 92 }
|
|
|
+ - match: { tokens.5.end_offset: 95 }
|
|
|
+ - match: { tokens.5.position: 119 }
|