package org.apache.ctakes.core.cleartk;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.utils.distsem.WordEmbeddings;
import org.apache.ctakes.utils.distsem.WordVector;
import org.apache.ctakes.utils.distsem.WordVectorReader;
import org.apache.uima.jcas.JCas;
import org.cleartk.ml.Feature;
import org.cleartk.ml.feature.extractor.CleartkExtractorException;
import org.cleartk.ml.feature.extractor.NamedFeatureExtractor1;

/* loaded from: input_file:org/apache/ctakes/core/cleartk/ContinuousTextExtractor.class */
public class ContinuousTextExtractor implements NamedFeatureExtractor1<BaseToken> {
    private int dims;
    private WordEmbeddings words;
    private OovStrategy oovStrategy;

    /* loaded from: input_file:org/apache/ctakes/core/cleartk/ContinuousTextExtractor$OovStrategy.class */
    public enum OovStrategy {
        OOV_FEATURE,
        EMPTY_VECTOR,
        MEAN_VECTOR
    }

    public ContinuousTextExtractor(String str) throws CleartkExtractorException {
        this(str, OovStrategy.OOV_FEATURE);
    }

    public ContinuousTextExtractor(String str, OovStrategy oovStrategy) throws CleartkExtractorException {
        this.words = null;
        this.oovStrategy = null;
        try {
            this.words = WordVectorReader.getEmbeddings(FileLocator.getAsStream(str));
            this.oovStrategy = oovStrategy;
        } catch (IOException e) {
            e.printStackTrace();
            throw new CleartkExtractorException(e);
        }
    }

    public List<Feature> extract(JCas jCas, BaseToken baseToken) throws CleartkExtractorException {
        ArrayList arrayList = new ArrayList();
        String coveredText = baseToken.getCoveredText();
        WordVector wordVector = null;
        if (this.words.containsKey(coveredText)) {
            wordVector = this.words.getVector(coveredText);
        } else if (this.words.containsKey(coveredText.toLowerCase())) {
            wordVector = this.words.getVector(coveredText.toLowerCase());
        } else {
            if (this.oovStrategy == OovStrategy.OOV_FEATURE) {
                arrayList.add(new Feature(getFeatureName(), "OOV"));
                return arrayList;
            }
            if (this.oovStrategy == OovStrategy.EMPTY_VECTOR) {
                wordVector = new WordVector("_empty_", new double[this.words.getDimensionality()]);
            } else if (this.oovStrategy == OovStrategy.MEAN_VECTOR) {
                wordVector = this.words.getMeanVector();
            }
        }
        for (int i = 0; i < wordVector.size(); i++) {
            arrayList.add(new Feature(getFeatureName() + "_" + i, Double.valueOf(wordVector.getValue(i))));
        }
        return arrayList;
    }

    public int getEmbeddingsDimensionality() {
        return this.words.getDimensionality();
    }

    public String getFeatureName() {
        return "ContinuousText";
    }
}
