package org.bitbucket.eunjeon.seunjeon.elasticsearch;

import java.io.IOException;
import java.io.StringWriter;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Queue;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.elasticsearch.common.logging.ESLoggerFactory;

/* loaded from: input_file:org/bitbucket/eunjeon/seunjeon/elasticsearch/SeunjeonTokenizer.class */
public class SeunjeonTokenizer extends Tokenizer {
    private CharTermAttribute charTermAtt;
    private PositionIncrementAttribute posIncrAtt;
    private PositionLengthAttribute posLenAtt;
    private OffsetAttribute offsetAtt;
    private TypeAttribute typeAtt;
    private Queue<LuceneToken> tokensQueue;
    private TokenBuilder tokenBuilder;
    Logger logger;

    public SeunjeonTokenizer(TokenizerOptions tokenizerOptions) {
        super(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
        this.logger = null;
        this.logger = ESLoggerFactory.getLogger(tokenizerOptions.getName());
        initAttribute();
        TokenBuilder.setMaxUnkLength(tokenizerOptions.getMaxUnkLength());
        if (tokenizerOptions.getUserDictPath() != null) {
            TokenBuilder.setUserDict(tokenizerOptions.getUserDictPath());
            this.logger.info(tokenizerOptions.getUserDictPath() + " loading was successful.");
            if (tokenizerOptions.getUserWords().length > 0) {
                this.logger.warn("ignored \"user_words\". because settings of \"user_dict_path\"");
            }
        } else {
            TokenBuilder.setUserDict((Iterator<String>) Arrays.asList(tokenizerOptions.getUserWords()).iterator());
        }
        this.tokenBuilder = new TokenBuilder(tokenizerOptions.getDeCompound(), tokenizerOptions.getDeInflect(), tokenizerOptions.getIndexEojeol(), tokenizerOptions.getPosTagging(), TokenBuilder.convertPos(tokenizerOptions.getIndexPoses()));
    }

    private void initAttribute() {
        this.charTermAtt = addAttribute(CharTermAttribute.class);
        this.posIncrAtt = addAttribute(PositionIncrementAttribute.class);
        this.posLenAtt = addAttribute(PositionLengthAttribute.class);
        this.offsetAtt = addAttribute(OffsetAttribute.class);
        this.typeAtt = addAttribute(TypeAttribute.class);
    }

    public void reset() throws IOException {
        super.reset();
        this.tokensQueue = new LinkedList(this.tokenBuilder.tokenize(getDocument()));
    }

    public final boolean incrementToken() throws IOException {
        if (this.tokensQueue.isEmpty()) {
            return false;
        }
        LuceneToken poll = this.tokensQueue.poll();
        this.posIncrAtt.setPositionIncrement(poll.positionIncr());
        this.posLenAtt.setPositionLength(poll.positionLength());
        this.offsetAtt.setOffset(correctOffset(poll.startOffset()), correctOffset(poll.endOffset()));
        String charTerm = poll.charTerm();
        this.charTermAtt.copyBuffer(charTerm.toCharArray(), 0, charTerm.length());
        this.typeAtt.setType(poll.poses());
        return true;
    }

    private String getDocument() throws IOException {
        StringWriter stringWriter = new StringWriter();
        char[] cArr = new char[4096];
        while (true) {
            int read = this.input.read(cArr);
            if (-1 == read) {
                return stringWriter.toString().toLowerCase();
            }
            stringWriter.write(cArr, 0, read);
        }
    }
}
