package org.apache.ctakes.core.ae;

import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Scanner;
import org.apache.ctakes.core.pipeline.GenerateSentenceBIODescriptors;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.ctakes.utils.struct.CounterMap;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASRuntimeException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.ml.CleartkAnnotator;
import org.cleartk.ml.DataWriter;
import org.cleartk.ml.Feature;
import org.cleartk.ml.Instance;
import org.cleartk.ml.feature.function.CharacterCategoryPatternFunction;
import org.cleartk.util.ViewUriUtil;

/* loaded from: input_file:org/apache/ctakes/core/ae/SentenceDetectorAnnotatorBIO.class */
public class SentenceDetectorAnnotatorBIO extends CleartkAnnotator<String> {
    private static final int WINDOW_SIZE = 3;
    public static final String PARAM_FEAT_CONFIG = "FeatureConfiguration";
    public static final String PARAM_TOKEN_FILE = "TokenFilename";
    static CharacterCategoryPatternFunction<Annotation> shapeFun = new CharacterCategoryPatternFunction<>(CharacterCategoryPatternFunction.PatternType.REPEATS_AS_KLEENE_PLUS);
    private Logger logger = Logger.getLogger(SentenceDetectorAnnotatorBIO.class);

    @ConfigurationParameter(name = PARAM_FEAT_CONFIG, mandatory = false)
    private FEAT_CONFIG featConfig = FEAT_CONFIG.CHAR;

    @ConfigurationParameter(name = PARAM_TOKEN_FILE, mandatory = false)
    private String tokenCountFile = "org/apache/ctakes/core/sentdetect/tokenCounts.txt";
    CounterMap<String> tokenCounts = new CounterMap<>();
    private HashMap<Integer, Double> endCounts = null;
    private double maxLineStrength = -1.0d;
    private int maxLineLength = -1;

    /* loaded from: input_file:org/apache/ctakes/core/ae/SentenceDetectorAnnotatorBIO$FEAT_CONFIG.class */
    public enum FEAT_CONFIG {
        GILLICK,
        CHAR,
        SHAPE,
        LINE_POS,
        CHAR_SHAPE,
        CHAR_POS,
        CHAR_SHAPE_POS
    }

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        try {
            Scanner scanner = new Scanner(FileLocator.getAsStream(this.tokenCountFile));
            while (scanner.hasNextLine()) {
                String[] split = scanner.nextLine().trim().split(" : ");
                if (split.length == 2) {
                    this.tokenCounts.put(split[0], Integer.valueOf(Integer.parseInt(split[1])));
                }
            }
            scanner.close();
        } catch (FileNotFoundException e) {
            throw new ResourceInitializationException(e);
        }
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        Sentence sentence;
        String str;
        try {
            this.logger.info(String.format("Processing file with uri %s", ViewUriUtil.getURI(jCas).toString()));
        } catch (CASRuntimeException e) {
            this.logger.debug("No uri found, probably not a big deal unless this is an evaluation.");
        }
        if (this.featConfig == FEAT_CONFIG.LINE_POS || this.featConfig == FEAT_CONFIG.CHAR_POS || this.featConfig == FEAT_CONFIG.CHAR_SHAPE_POS) {
            buildDocEndlineModel(jCas);
        }
        for (Segment segment : JCasUtil.select(jCas, Segment.class)) {
            List selectCovered = JCasUtil.selectCovered(jCas, Sentence.class, segment);
            int i = 0;
            if (selectCovered.size() > 0) {
                i = 0 + 1;
                sentence = (Sentence) selectCovered.get(0);
            } else {
                sentence = null;
            }
            Sentence sentence2 = sentence;
            int i2 = 0;
            String str2 = "O";
            String coveredText = segment.getCoveredText();
            for (int i3 = 0; i3 < coveredText.length(); i3++) {
                ArrayList arrayList = new ArrayList();
                char charAt = coveredText.charAt(i3);
                arrayList.add(new Feature("PrevOutcome", str2));
                arrayList.addAll(getCharFeatures(charAt, "Character"));
                if (this.featConfig == FEAT_CONFIG.CHAR || this.featConfig == FEAT_CONFIG.CHAR_POS || this.featConfig == FEAT_CONFIG.CHAR_SHAPE || this.featConfig == FEAT_CONFIG.CHAR_SHAPE_POS) {
                    for (int i4 = -3; i4 <= 3; i4++) {
                        if (i3 + i4 >= 0 && i3 + i4 < coveredText.length()) {
                            arrayList.addAll(getCharFeatures(coveredText.charAt(i3 + i4), "CharOffset_" + i4));
                        }
                    }
                }
                String nextToken = getNextToken(coveredText, i3);
                arrayList.addAll(getTokenFeatures(getPrevToken(coveredText, i3), nextToken, "Token"));
                if (this.featConfig == FEAT_CONFIG.LINE_POS || this.featConfig == FEAT_CONFIG.CHAR_POS || this.featConfig == FEAT_CONFIG.CHAR_SHAPE_POS) {
                    arrayList.addAll(getPositionFeatures(charAt, i3, coveredText, nextToken));
                }
                int begin = segment.getBegin() + i3;
                if (isTraining()) {
                    while (sentence2 != null && sentence2.getEnd() < begin && i < selectCovered.size()) {
                        int i5 = i;
                        i++;
                        sentence2 = (Sentence) selectCovered.get(i5);
                    }
                    str = sentence2 == null ? "O" : begin < sentence2.getBegin() ? "O" : str2.equals("O") ? "B" : "I";
                    this.dataWriter.write(new Instance(str, arrayList));
                } else if (str2.equals("O") || !Character.isLetterOrDigit(charAt)) {
                    str = (String) this.classifier.classify(arrayList);
                    if (str.equals("B")) {
                        i2 = begin;
                    } else if (str.equals("O") && (str2.equals("I") || str2.equals("B"))) {
                        int i6 = begin;
                        while (i6 > i2 && Character.isWhitespace(coveredText.charAt((i6 - segment.getBegin()) - 1))) {
                            i6--;
                        }
                        if (i6 > i2) {
                            makeSentence(jCas, i2, i6);
                        }
                    }
                } else {
                    str = "I";
                }
                str2 = str;
            }
            if (!isTraining() && !str2.equals("O")) {
                makeSentence(jCas, i2, segment.getEnd());
            }
        }
    }

    private void buildDocEndlineModel(JCas jCas) {
        HashMap hashMap = new HashMap();
        this.endCounts = new HashMap<>();
        this.maxLineStrength = -1.0d;
        this.maxLineLength = -1;
        Iterator it = JCasUtil.select(jCas, Segment.class).iterator();
        while (it.hasNext()) {
            for (String str : ((Segment) it.next()).getCoveredText().split("\n+")) {
                if (!str.contains("[**") && !str.contains("**]")) {
                    if (!hashMap.containsKey(Integer.valueOf(str.length()))) {
                        hashMap.put(Integer.valueOf(str.length()), Double.valueOf(0.0d));
                    }
                    hashMap.put(Integer.valueOf(str.length()), Double.valueOf(((Double) hashMap.get(Integer.valueOf(str.length()))).doubleValue() + 1.0d));
                    int length = str.replaceAll("\\s+$", "").length();
                    if (length > this.maxLineLength) {
                        this.maxLineLength = length;
                    }
                }
            }
        }
        Iterator it2 = hashMap.keySet().iterator();
        while (it2.hasNext()) {
            int intValue = ((Integer) it2.next()).intValue();
            double doubleValue = ((Double) hashMap.get(Integer.valueOf(intValue))).doubleValue();
            for (int max = Math.max(0, (intValue - 5) + 1); max < intValue + 5; max++) {
                if (!this.endCounts.containsKey(Integer.valueOf(max))) {
                    this.endCounts.put(Integer.valueOf(max), Double.valueOf(0.0d));
                }
                this.endCounts.put(Integer.valueOf(max), Double.valueOf(this.endCounts.get(Integer.valueOf(max)).doubleValue() + (((5 - Math.abs(max - intValue)) * doubleValue) / 5)));
            }
        }
        Iterator<Double> it3 = this.endCounts.values().iterator();
        while (it3.hasNext()) {
            double doubleValue2 = it3.next().doubleValue();
            if (doubleValue2 > this.maxLineStrength) {
                this.maxLineStrength = doubleValue2;
            }
        }
    }

    public static void makeSentence(JCas jCas, int i, int i2) {
        String documentText = jCas.getDocumentText();
        while (i < documentText.length() && Character.isWhitespace(documentText.charAt(i))) {
            i++;
        }
        while (i2 > 0 && Character.isWhitespace(documentText.charAt(i2 - 1))) {
            i2--;
        }
        if (i < i2) {
            new Sentence(jCas, i, i2).addToIndexes();
        }
    }

    private static String getNextToken(String str, int i) {
        int i2 = i;
        while (i2 < str.length() && Character.isWhitespace(str.charAt(i2))) {
            i2++;
        }
        while (i2 > 0 && !Character.isWhitespace(str.charAt(i2 - 1))) {
            i2--;
        }
        int i3 = i2;
        while (i3 < str.length() && !Character.isWhitespace(str.charAt(i3))) {
            i3++;
        }
        return str.substring(i2, i3);
    }

    private static String getPrevToken(String str, int i) {
        int i2 = i;
        while (i2 > 0 && !Character.isWhitespace(str.charAt(i2))) {
            i2--;
        }
        while (i2 > 0 && Character.isWhitespace(str.charAt(i2))) {
            i2--;
        }
        int i3 = i2;
        while (i3 > 0 && !Character.isWhitespace(str.charAt(i3)) && !Character.isWhitespace(str.charAt(i3 - 1))) {
            i3--;
        }
        return str.substring(i3, i2 + 1);
    }

    private Collection<? extends Feature> getTokenFeatures(String str, String str2, String str3) {
        ArrayList arrayList = new ArrayList();
        Feature feature = new Feature(str3 + "PrevIdentity", str);
        arrayList.add(feature);
        Feature feature2 = new Feature(str3 + "NextIdentity", str2);
        arrayList.add(feature2);
        if (this.featConfig != FEAT_CONFIG.GILLICK) {
            arrayList.add(new Feature(str3 + "NextLength=" + str2.length(), true));
        }
        arrayList.add(new Feature(str3 + "PrevLength=" + str.length(), true));
        arrayList.add(new Feature(str3 + "cap", Boolean.valueOf(str2.length() > 0 && Character.isUpperCase(str2.charAt(0)))));
        if (this.featConfig == FEAT_CONFIG.CHAR_SHAPE_POS || this.featConfig == FEAT_CONFIG.CHAR_SHAPE || this.featConfig == FEAT_CONFIG.SHAPE) {
            arrayList.addAll(shapeFun.apply(feature));
            arrayList.addAll(shapeFun.apply(feature2));
        }
        arrayList.add(new Feature(str3 + "_RightLower_" + ((int) Math.round(Math.log(this.tokenCounts.get(str2.toLowerCase()).intValue()))), true));
        String str4 = str;
        if (str.endsWith(".")) {
            str4 = str.substring(0, str.length() - 1);
        }
        arrayList.add(new Feature(str3 + "_LeftDotless_" + ((int) Math.round(Math.log(this.tokenCounts.get(str4).intValue()))), true));
        arrayList.add(new Feature("TokenContextCat_" + str + "_" + str2));
        arrayList.add(new Feature("LeftWordRightCap", str + "_" + (str2.length() > 0 && Character.isUpperCase(str2.charAt(0)))));
        return arrayList;
    }

    public static List<Feature> getCharFeatures(char c, String str) {
        ArrayList arrayList = new ArrayList();
        arrayList.add(new Feature(str + "_Id", c == '\n' ? "<LF>" : Character.valueOf(c)));
        arrayList.add(new Feature(str + "_Upper", Boolean.valueOf(Character.isUpperCase(c))));
        arrayList.add(new Feature(str + "_Lower", Boolean.valueOf(Character.isLowerCase(c))));
        arrayList.add(new Feature(str + "_Digit", Boolean.valueOf(Character.isDigit(c))));
        arrayList.add(new Feature(str + "_Space", Boolean.valueOf(Character.isWhitespace(c))));
        arrayList.add(new Feature(str + "_Type" + Character.getType(c), true));
        return arrayList;
    }

    public List<Feature> getPositionFeatures(char c, int i, String str, String str2) {
        ArrayList arrayList = new ArrayList();
        if (c == '\n' && i > 0) {
            int lastIndexOf = (i - str.lastIndexOf(10, i - 1)) - 1;
            int length = lastIndexOf + str2.length();
            if (lastIndexOf <= this.maxLineLength && lastIndexOf + 1 + str2.length() > this.maxLineLength) {
                arrayList.add(new Feature("NextWordWrapsLine", true));
            }
            double doubleValue = this.endCounts.containsKey(Integer.valueOf(lastIndexOf)) ? this.endCounts.get(Integer.valueOf(lastIndexOf)).doubleValue() : 0.0d;
            double doubleValue2 = this.endCounts.containsKey(Integer.valueOf(length)) ? this.endCounts.get(Integer.valueOf(length)).doubleValue() : 0.0d;
            int i2 = lastIndexOf;
            while (true) {
                if (i2 >= lastIndexOf + 1 + str2.length()) {
                    break;
                }
                if (this.endCounts.containsKey(Integer.valueOf(i2)) && this.endCounts.get(Integer.valueOf(i2)).doubleValue() > doubleValue2) {
                    arrayList.add(new Feature("LinePosNextWrapsLocalMax", true));
                    break;
                }
                i2++;
            }
            if (this.endCounts.containsKey(Integer.valueOf(lastIndexOf))) {
                arrayList.add(new Feature("LinePosStrength", Double.valueOf(this.endCounts.get(Integer.valueOf(lastIndexOf)).doubleValue() / this.maxLineStrength)));
            }
        }
        return arrayList;
    }

    public static AnalysisEngineDescription getDataWriter(File file, Class<? extends DataWriter<?>> cls) throws ResourceInitializationException {
        return AnalysisEngineFactory.createEngineDescription(SentenceDetectorAnnotatorBIO.class, new Object[]{"isTraining", true, "outputDirectory", file, "dataWriterClassName", cls, PARAM_FEAT_CONFIG, FEAT_CONFIG.CHAR});
    }

    public static AnalysisEngineDescription getDescription(String str) throws ResourceInitializationException {
        return AnalysisEngineFactory.createEngineDescription(SentenceDetectorAnnotatorBIO.class, new Object[]{"isTraining", false, "classifierJarPath", str, PARAM_FEAT_CONFIG, FEAT_CONFIG.CHAR});
    }

    public static AnalysisEngineDescription getDescription() throws ResourceInitializationException {
        return getDescription(GenerateSentenceBIODescriptors.sentModelPath);
    }
}
