package org.apache.uima.fit.component;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.CloseShieldOutputStream;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.resource.ResourceInitializationException;
import org.springframework.util.DigestUtils;

/* loaded from: input_file:org/apache/uima/fit/component/CasDumpWriter.class */
public class CasDumpWriter extends CasConsumer_ImplBase {
    public static final String INCLUDE_PREFIX = "+|";
    public static final String EXCLUDE_PREFIX = "-|";
    public static final String PARAM_OUTPUT_FILE = "outputFile";

    @ConfigurationParameter(name = PARAM_OUTPUT_FILE, mandatory = true, defaultValue = {"-"})
    private File outputFile;
    public static final String PARAM_WRITE_DOCUMENT_META_DATA = "writeDocumentMetaData";

    @ConfigurationParameter(name = PARAM_WRITE_DOCUMENT_META_DATA, mandatory = true, defaultValue = {"true"})
    private boolean writeDocumentMetaData;
    public static final String PARAM_FEATURE_PATTERNS = "featurePatterns";

    @ConfigurationParameter(name = PARAM_FEATURE_PATTERNS, mandatory = true, defaultValue = {"+|.*", "-|^.*documentUri:.*$", "-|^.*collectionId:.*$", "-|^.*documentBaseUri:.*$"})
    private String[] featurePatterns;
    private InExPattern[] cookedFeaturePatterns;
    public static final String PARAM_TYPE_PATTERNS = "typePatterns";

    @ConfigurationParameter(name = PARAM_TYPE_PATTERNS, mandatory = true, defaultValue = {"+|.*"})
    private String[] typePatterns;
    private InExPattern[] cookedTypePatterns;
    private PrintWriter out;
    private int iCas;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/uima/fit/component/CasDumpWriter$InExPattern.class */
    public static class InExPattern {
        final boolean includeInOutput;
        final Matcher matchter;

        public InExPattern(String str, boolean z) {
            this.includeInOutput = z;
            this.matchter = Pattern.compile(str).matcher("");
        }
    }

    @Override // org.apache.uima.fit.component.CasConsumer_ImplBase
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        try {
            if (this.out == null) {
                if ("-".equals(this.outputFile.getName())) {
                    this.out = new PrintWriter((OutputStream) new CloseShieldOutputStream(System.out));
                } else {
                    if (this.outputFile.getParentFile() != null) {
                        this.outputFile.getParentFile().mkdirs();
                    }
                    this.out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(this.outputFile), "UTF-8"));
                }
            }
            this.cookedTypePatterns = compilePatterns(this.typePatterns);
            this.cookedFeaturePatterns = compilePatterns(this.featurePatterns);
        } catch (IOException e) {
            throw new ResourceInitializationException(e);
        }
    }

    public void process(CAS cas) throws AnalysisEngineProcessException {
        this.out.println("======== CAS " + this.iCas + " begin ==================================");
        this.out.println();
        Iterator viewIterator = cas.getViewIterator();
        while (viewIterator.hasNext()) {
            CAS cas2 = (CAS) viewIterator.next();
            processView(cas2);
            if (cas2.getDocumentText() == null && cas2.getSofaDataStream() != null) {
                processSofaData(cas2);
            }
        }
        this.out.println("======== CAS " + this.iCas + " end ==================================");
        this.out.println();
        this.out.println();
        this.out.flush();
        this.iCas++;
    }

    public void collectionProcessComplete() {
        IOUtils.closeQuietly(this.out);
        this.out = null;
    }

    private void processDocumentMetadata(CAS cas) {
        if (this.writeDocumentMetaData) {
            processFeatureStructure(cas.getDocumentAnnotation());
        }
    }

    private void processDocumentText(CAS cas) {
        this.out.println();
        this.out.println("CAS-Text:");
        this.out.println(cas.getDocumentText());
    }

    private void processFeatureStructures(CAS cas) {
        Set<String> types = getTypes(cas);
        FSIterator it = cas.getAnnotationIndex().iterator();
        while (it.hasNext()) {
            AnnotationFS annotationFS = (AnnotationFS) it.next();
            if (types.contains(annotationFS.getType().getName())) {
                try {
                    this.out.println("[" + annotationFS.getCoveredText() + "]");
                } catch (IndexOutOfBoundsException e) {
                    this.out.println("<OFFSETS OUT OF BOUNDS>");
                }
                processFeatureStructure(annotationFS);
            }
        }
    }

    private void processFeatureStructure(FeatureStructure featureStructure) {
        for (String str : featureStructure.toString().split("\n")) {
            boolean z = false;
            for (InExPattern inExPattern : this.cookedFeaturePatterns) {
                inExPattern.matchter.reset(str);
                if (inExPattern.matchter.matches()) {
                    z = inExPattern.includeInOutput;
                }
            }
            if (z) {
                this.out.println(str);
            }
        }
    }

    private void processView(CAS cas) {
        this.out.println("-------- View " + cas.getViewName() + " begin ----------------------------------");
        this.out.println();
        processDocumentMetadata(cas);
        processDocumentText(cas);
        processFeatureStructures(cas);
        this.out.println("-------- View " + cas.getViewName() + " end ----------------------------------");
        this.out.println();
    }

    private void processSofaData(CAS cas) throws AnalysisEngineProcessException {
        this.out.println("Sofa data:");
        String sofaMimeType = cas.getSofaMimeType();
        if (sofaMimeType != null) {
            this.out.println("   mime type:\t" + sofaMimeType);
        }
        InputStream inputStream = null;
        try {
            try {
                inputStream = cas.getSofaDataStream();
                byte[] byteArray = IOUtils.toByteArray(inputStream);
                IOUtils.closeQuietly(inputStream);
                if (byteArray != null) {
                    this.out.println("   size:\t" + byteArray.length + " byte(s)");
                    this.out.println("   hash value:\t" + DigestUtils.md5DigestAsHex(byteArray));
                }
                this.out.println();
            } catch (IOException e) {
                throw new AnalysisEngineProcessException(e);
            }
        } catch (Throwable th) {
            IOUtils.closeQuietly(inputStream);
            throw th;
        }
    }

    private static InExPattern[] compilePatterns(String[] strArr) {
        InExPattern[] inExPatternArr = new InExPattern[strArr.length];
        for (int i = 0; i < strArr.length; i++) {
            if (strArr[i].startsWith(INCLUDE_PREFIX)) {
                inExPatternArr[i] = new InExPattern(strArr[i].substring(INCLUDE_PREFIX.length()), true);
            } else if (strArr[i].startsWith(EXCLUDE_PREFIX)) {
                inExPatternArr[i] = new InExPattern(strArr[i].substring(EXCLUDE_PREFIX.length()), false);
            } else {
                inExPatternArr[i] = new InExPattern(strArr[i], false);
            }
        }
        return inExPatternArr;
    }

    private Set<String> getTypes(CAS cas) {
        HashSet hashSet = new HashSet();
        Iterator typeIterator = cas.getTypeSystem().getTypeIterator();
        while (typeIterator.hasNext()) {
            Type type = (Type) typeIterator.next();
            if (!type.getName().equals(cas.getDocumentAnnotation().getType().getName())) {
                InExPattern[] inExPatternArr = this.cookedTypePatterns;
                int length = inExPatternArr.length;
                int i = 0;
                while (true) {
                    if (i < length) {
                        InExPattern inExPattern = inExPatternArr[i];
                        inExPattern.matchter.reset(type.getName());
                        if (!inExPattern.matchter.matches()) {
                            i++;
                        } else if (inExPattern.includeInOutput) {
                            hashSet.add(type.getName());
                        } else {
                            hashSet.remove(type.getName());
                        }
                    }
                }
            }
        }
        return hashSet;
    }
}
