|
|
@@ -100,6 +100,50 @@ public class NerProcessorTests extends ESTestCase {
|
|
|
assertThat(e, instanceOf(ElasticsearchStatusException.class));
|
|
|
}
|
|
|
|
|
|
+ public void testProcessResultsWithSpecialTokens() {
|
|
|
+ NerProcessor.NerResultProcessor processor = new NerProcessor.NerResultProcessor(NerProcessor.IobTag.values(), null, true);
|
|
|
+ BertTokenizer tokenizer = BertTokenizer.builder(
|
|
|
+ List.of(
|
|
|
+ "el",
|
|
|
+ "##astic",
|
|
|
+ "##search",
|
|
|
+ "many",
|
|
|
+ "use",
|
|
|
+ "in",
|
|
|
+ "london",
|
|
|
+ BertTokenizer.PAD_TOKEN,
|
|
|
+ BertTokenizer.UNKNOWN_TOKEN,
|
|
|
+ BertTokenizer.SEPARATOR_TOKEN,
|
|
|
+ BertTokenizer.CLASS_TOKEN
|
|
|
+ ),
|
|
|
+ new BertTokenization(true, true, null, Tokenization.Truncate.NONE)
|
|
|
+ ).build();
|
|
|
+ TokenizationResult tokenization = tokenizer.buildTokenizationResult(
|
|
|
+ List.of(tokenizer.tokenize("Many use Elasticsearch in London", Tokenization.Truncate.NONE))
|
|
|
+ );
|
|
|
+
|
|
|
+ double[][][] scores = {
|
|
|
+ {
|
|
|
+ { 7, 0, 0, 0, 0, 0, 0, 0, 0 }, // cls
|
|
|
+ { 7, 0, 0, 0, 0, 0, 0, 0, 0 }, // many
|
|
|
+ { 7, 0, 0, 0, 0, 0, 0, 0, 0 }, // use
|
|
|
+ { 0.01, 0.01, 0, 0.01, 0, 7, 0, 3, 0 }, // el
|
|
|
+ { 0.01, 0.01, 0, 0, 0, 0, 0, 0, 0 }, // ##astic
|
|
|
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // ##search
|
|
|
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // in
|
|
|
+ { 0, 0, 0, 0, 0, 0, 0, 6, 0 }, // london
|
|
|
+ { 7, 0, 0, 0, 0, 0, 0, 0, 0 } // sep
|
|
|
+ } };
|
|
|
+ NerResults result = (NerResults) processor.processResult(tokenization, new PyTorchInferenceResult("1", scores, 1L, null));
|
|
|
+
|
|
|
+ assertThat(result.getAnnotatedResult(), equalTo("Many use [Elasticsearch](ORG&Elasticsearch) in [London](LOC&London)"));
|
|
|
+ assertThat(result.getEntityGroups().size(), equalTo(2));
|
|
|
+ assertThat(result.getEntityGroups().get(0).getEntity(), equalTo("elasticsearch"));
|
|
|
+ assertThat(result.getEntityGroups().get(0).getClassName(), equalTo(NerProcessor.Entity.ORG.toString()));
|
|
|
+ assertThat(result.getEntityGroups().get(1).getEntity(), equalTo("london"));
|
|
|
+ assertThat(result.getEntityGroups().get(1).getClassName(), equalTo(NerProcessor.Entity.LOC.toString()));
|
|
|
+ }
|
|
|
+
|
|
|
public void testProcessResults() {
|
|
|
NerProcessor.NerResultProcessor processor = new NerProcessor.NerResultProcessor(NerProcessor.IobTag.values(), null, true);
|
|
|
TokenizationResult tokenization = tokenize(
|