package org.apache.ctakes.core.cr;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.ctakes.core.config.ConfigParameterConstants;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.typesystem.type.structured.DocumentID;
import org.apache.ctakes.typesystem.type.structured.DocumentIdPrefix;
import org.apache.ctakes.typesystem.type.structured.DocumentPath;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.CollectionReaderFactory;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;

@PipeBitInfo(name = "Files in Dir Tree Reader", description = "Reads document texts from text files in a directory tree.", role = PipeBitInfo.Role.READER, products = {PipeBitInfo.TypeProduct.DOCUMENT_ID, PipeBitInfo.TypeProduct.DOCUMENT_ID_PREFIX})
/* loaded from: input_file:org/apache/ctakes/core/cr/FileTreeReader.class */
public final class FileTreeReader extends JCasCollectionReader_ImplBase {
    private static final Logger LOGGER = Logger.getLogger("FileTreeReader");

    @ConfigurationParameter(name = "InputDirectory", description = ConfigParameterConstants.DESC_INPUTDIR)
    private String _rootDirPath;
    public static final String PARAM_ENCODING = "Encoding";

    @ConfigurationParameter(name = "Encoding", description = "The character encoding used by the input files.", mandatory = false)
    private String _encoding;
    public static final String PARAM_EXTENSIONS = "Extensions";

    @ConfigurationParameter(name = "Extensions", description = "The extensions of the files that the collection reader will read.  Values for this parameter should not begin with a dot.", mandatory = false)
    private String[] _explicitExtensions;
    private File _rootDir;
    private Collection<String> _validExtensions;
    private List<File> _files;
    private int _currentIndex;

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        try {
            this._rootDir = FileLocator.locateFile(this._rootDirPath);
            this._validExtensions = createValidExtensions(this._explicitExtensions);
            this._currentIndex = 0;
            this._files = getDescendentFiles(this._rootDir, this._validExtensions);
        } catch (FileNotFoundException e) {
            throw new ResourceInitializationException(e);
        }
    }

    static Collection<String> createValidExtensions(String... strArr) {
        if (strArr == null || strArr.length == 0) {
            return Collections.emptyList();
        }
        if (strArr.length == 1 && (strArr[0].equals("*") || strArr[0].equals(".*"))) {
            return Collections.emptyList();
        }
        ArrayList arrayList = new ArrayList(strArr.length);
        for (String str : strArr) {
            if (str.startsWith(".")) {
                arrayList.add(str);
            } else {
                arrayList.add('.' + str);
            }
        }
        return arrayList;
    }

    private static List<File> getDescendentFiles(File file, Collection<String> collection) {
        File[] listFiles = file.listFiles();
        if (listFiles == null || listFiles.length == 0) {
            return Collections.emptyList();
        }
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (File file2 : listFiles) {
            if (file2.isDirectory()) {
                arrayList.add(file2);
            } else if (isExtensionValid(file2, collection) && !file2.isHidden()) {
                arrayList2.add(file2);
            }
        }
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            arrayList2.addAll(getDescendentFiles((File) it.next(), collection));
        }
        return arrayList2;
    }

    static boolean isExtensionValid(File file, Collection<String> collection) {
        if (collection.isEmpty()) {
            return true;
        }
        String name = file.getName();
        for (String str : collection) {
            if (name.endsWith(str)) {
                if (!name.equals(str)) {
                    return true;
                }
                LOGGER.warn("File " + file.getPath() + " is named as extension " + str + " ; discarded");
                return false;
            }
        }
        return false;
    }

    static String createDocumentID(File file, Collection<String> collection) {
        String name = file.getName();
        String str = "";
        for (String str2 : collection) {
            if (name.endsWith(str2) && str2.length() > str.length()) {
                str = str2;
            }
        }
        int lastIndexOf = name.lastIndexOf(46);
        if (!str.isEmpty()) {
            lastIndexOf = name.length() - str.length();
        }
        return lastIndexOf < 0 ? name : name.substring(0, lastIndexOf);
    }

    private static String createDocumentIdPrefix(File file, File file2) {
        String parent = file.getParent();
        String path = file2.getPath();
        return (parent.equals(path) || !parent.startsWith(path)) ? "" : parent.substring(path.length() + 1);
    }

    public int getNumberOfDocuments() {
        return this._files.size();
    }

    public boolean hasNext() {
        return this._currentIndex < this._files.size();
    }

    public void getNext(JCas jCas) throws IOException, CollectionException {
        File file = this._files.get(this._currentIndex);
        this._currentIndex++;
        byte[] bArr = new byte[8192];
        StringBuilder sb = new StringBuilder();
        try {
            BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(file), bArr.length);
            Throwable th = null;
            while (true) {
                try {
                    try {
                        int read = bufferedInputStream.read(bArr);
                        if (read < 0) {
                            break;
                        } else if (this._encoding != null) {
                            sb.append(new String(bArr, 0, read, this._encoding));
                        } else {
                            sb.append(new String(bArr, 0, read));
                        }
                    } finally {
                    }
                } finally {
                }
            }
            if (bufferedInputStream != null) {
                if (0 != 0) {
                    try {
                        bufferedInputStream.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                } else {
                    bufferedInputStream.close();
                }
            }
            jCas.setDocumentText(sb.toString());
            DocumentID documentID = new DocumentID(jCas);
            documentID.setDocumentID(createDocumentID(file, this._validExtensions));
            documentID.addToIndexes();
            DocumentIdPrefix documentIdPrefix = new DocumentIdPrefix(jCas);
            documentIdPrefix.setDocumentIdPrefix(createDocumentIdPrefix(file, this._rootDir));
            documentIdPrefix.addToIndexes();
            DocumentPath documentPath = new DocumentPath(jCas);
            documentPath.setDocumentPath(file.getAbsolutePath());
            documentPath.addToIndexes();
        } catch (FileNotFoundException e) {
            throw new IOException(e);
        }
    }

    public void close() throws IOException {
    }

    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this._currentIndex, this._files.size(), "entities")};
    }

    public static CollectionReader createReader(String str) throws ResourceInitializationException {
        return CollectionReaderFactory.createReader(FileTreeReader.class, new Object[]{"InputDirectory", str});
    }
}
