|  | @@ -146,13 +146,20 @@ public final class UnigramTokenizer extends Tokenizer {
 | 
	
		
			
				|  |  |          offsetAtt.setOffset(correctOffset(whitespaceTokenizer.finalOffset), correctOffset(whitespaceTokenizer.finalOffset));
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    @Override
 | 
	
		
			
				|  |  | -    public boolean incrementToken() throws IOException {
 | 
	
		
			
				|  |  | -        clearAttributes();
 | 
	
		
			
				|  |  | +    private void popFromTokens() {
 | 
	
		
			
				|  |  |          if (tokens.isEmpty() == false) {
 | 
	
		
			
				|  |  |              DelimitedToken.Encoded token = tokens.removeFirst();
 | 
	
		
			
				|  |  | +            tokenizedValues.add(token);
 | 
	
		
			
				|  |  |              termAtt.setEmpty().append(token.charSequence());
 | 
	
		
			
				|  |  |              offsetAtt.setOffset(token.startOffset(), token.endOffset());
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    @Override
 | 
	
		
			
				|  |  | +    public boolean incrementToken() throws IOException {
 | 
	
		
			
				|  |  | +        clearAttributes();
 | 
	
		
			
				|  |  | +        if (tokens.isEmpty() == false) {
 | 
	
		
			
				|  |  | +            popFromTokens();
 | 
	
		
			
				|  |  |              return true;
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |          // First, whitespace tokenize
 | 
	
	
		
			
				|  | @@ -160,7 +167,7 @@ public final class UnigramTokenizer extends Tokenizer {
 | 
	
		
			
				|  |  |          if (whitespaceToken != null) {
 | 
	
		
			
				|  |  |              if (neverSplitHash.contains(whitespaceToken.charSequence())) {
 | 
	
		
			
				|  |  |                  Integer maybeTokenized = vocabToId.get(new BytesRef(whitespaceToken.charSequence()));
 | 
	
		
			
				|  |  | -                tokenizedValues.add(
 | 
	
		
			
				|  |  | +                tokens.add(
 | 
	
		
			
				|  |  |                      new DelimitedToken.Encoded(
 | 
	
		
			
				|  |  |                          whitespaceToken.charSequence().toString(),
 | 
	
		
			
				|  |  |                          Objects.requireNonNullElse(maybeTokenized, unknownTokenId),
 | 
	
	
		
			
				|  | @@ -168,7 +175,7 @@ public final class UnigramTokenizer extends Tokenizer {
 | 
	
		
			
				|  |  |                          correctOffset(whitespaceToken.endOffset())
 | 
	
		
			
				|  |  |                      )
 | 
	
		
			
				|  |  |                  );
 | 
	
		
			
				|  |  | -                offsetAtt.setOffset(correctOffset(whitespaceToken.startOffset()), correctOffset(whitespaceToken.endOffset()));
 | 
	
		
			
				|  |  | +                popFromTokens();
 | 
	
		
			
				|  |  |                  return true;
 | 
	
		
			
				|  |  |              }
 | 
	
		
			
				|  |  |              int inputOffsetStart = whitespaceToken.startOffset();
 | 
	
	
		
			
				|  | @@ -217,12 +224,9 @@ public final class UnigramTokenizer extends Tokenizer {
 | 
	
		
			
				|  |  |                      MultiCharSequence.from(PREFIX, token.charSequence()),
 | 
	
		
			
				|  |  |                      offsetCorrectorFunction
 | 
	
		
			
				|  |  |                  );
 | 
	
		
			
				|  |  | -                tokenizedValues.addAll(tokenList);
 | 
	
		
			
				|  |  |                  tokens.addAll(tokenList);
 | 
	
		
			
				|  |  |              }
 | 
	
		
			
				|  |  | -            DelimitedToken.Encoded token = tokens.removeFirst();
 | 
	
		
			
				|  |  | -            termAtt.setEmpty().append(token.charSequence());
 | 
	
		
			
				|  |  | -            offsetAtt.setOffset(token.startOffset(), token.endOffset());
 | 
	
		
			
				|  |  | +            popFromTokens();
 | 
	
		
			
				|  |  |              return true;
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |          return false;
 |