package org.apache.mahout.clustering.meanshift;

import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.AbstractCluster;
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
import org.apache.mahout.common.kernel.IKernelProfile;
import org.apache.mahout.math.VectorWritable;
import org.apache.xerces.impl.xs.SchemaSymbols;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.class */
public class MeanShiftCanopyDriver extends AbstractJob {
    public static final String MAPRED_REDUCE_TASKS = "mapred.reduce.tasks";
    private static final Logger log = LoggerFactory.getLogger(MeanShiftCanopyDriver.class);
    public static final String INPUT_IS_CANOPIES_OPTION = "inputIsCanopies";
    public static final String STATE_IN_KEY = "org.apache.mahout.clustering.meanshift.stateInKey";
    private static final String CONTROL_CONVERGED = "control/converged";

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new Configuration(), new MeanShiftCanopyDriver(), strArr);
    }

    public int run(String[] strArr) throws Exception {
        addInputOption();
        addOutputOption();
        addOption(DefaultOptionCreator.convergenceOption().create());
        addOption(DefaultOptionCreator.maxIterationsOption().create());
        addOption(DefaultOptionCreator.overwriteOption().create());
        addOption(DefaultOptionCreator.inputIsCanopiesOption().create());
        addOption(DefaultOptionCreator.distanceMeasureOption().create());
        addOption(DefaultOptionCreator.kernelProfileOption().create());
        addOption(DefaultOptionCreator.t1Option().create());
        addOption(DefaultOptionCreator.t2Option().create());
        addOption(DefaultOptionCreator.clusteringOption().create());
        addOption(DefaultOptionCreator.methodOption().create());
        if (parseArguments(strArr) == null) {
            return -1;
        }
        Path inputPath = getInputPath();
        Path outputPath = getOutputPath();
        if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
            HadoopUtil.delete(getConf(), outputPath);
        }
        String option = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
        String option2 = getOption(DefaultOptionCreator.KERNEL_PROFILE_OPTION);
        double parseDouble = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION));
        double parseDouble2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION));
        boolean hasOption = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
        run(getConf(), inputPath, outputPath, (DistanceMeasure) ClassUtils.instantiateAs(option, DistanceMeasure.class), (IKernelProfile) ClassUtils.instantiateAs(option2, IKernelProfile.class), parseDouble, parseDouble2, Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION)), Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION)), hasOption(INPUT_IS_CANOPIES_OPTION), hasOption, getOption("method").equalsIgnoreCase("sequential"));
        return 0;
    }

    public static void run(Configuration configuration, Path path, Path path2, DistanceMeasure distanceMeasure, IKernelProfile iKernelProfile, double d, double d2, double d3, int i, boolean z, boolean z2, boolean z3) throws IOException, InterruptedException, ClassNotFoundException {
        Path path3 = new Path(path2, Cluster.INITIAL_CLUSTERS_DIR);
        if (z) {
            path3 = path;
        } else {
            createCanopyFromVectors(configuration, path, path3, distanceMeasure, z3);
        }
        Path buildClusters = buildClusters(configuration, path3, path2, distanceMeasure, iKernelProfile, d, d2, d3, i, z3, z2);
        if (z2) {
            clusterData(z ? path : new Path(path2, Cluster.INITIAL_CLUSTERS_DIR), buildClusters, new Path(path2, "clusteredPoints"), z3);
        }
    }

    public static void createCanopyFromVectors(Configuration configuration, Path path, Path path2, DistanceMeasure distanceMeasure, boolean z) throws IOException, InterruptedException, ClassNotFoundException {
        if (z) {
            createCanopyFromVectorsSeq(path, path2, distanceMeasure);
        } else {
            createCanopyFromVectorsMR(configuration, path, path2, distanceMeasure);
        }
    }

    private static void createCanopyFromVectorsSeq(Path path, Path path2, DistanceMeasure distanceMeasure) throws IOException {
        Configuration configuration = new Configuration();
        FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
        int i = 0;
        int i2 = 0;
        for (FileStatus fileStatus : fileSystem.listStatus(path, PathFilters.logsCRCFilter())) {
            int i3 = i;
            i++;
            SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, configuration, new Path(path2, "part-m-" + i3), Text.class, MeanShiftCanopy.class);
            try {
                Iterator it = new SequenceFileValueIterable(fileStatus.getPath(), configuration).iterator();
                while (it.hasNext()) {
                    int i4 = i2;
                    i2++;
                    writer.append(new Text(), MeanShiftCanopy.initialCanopy(((VectorWritable) it.next()).get(), i4, distanceMeasure));
                }
            } finally {
                Closeables.closeQuietly(writer);
            }
        }
    }

    private static void createCanopyFromVectorsMR(Configuration configuration, Path path, Path path2, DistanceMeasure distanceMeasure) throws IOException, InterruptedException, ClassNotFoundException {
        configuration.set("org.apache.mahout.clustering.kmeans.measure", distanceMeasure.getClass().getName());
        Job job = new Job(configuration);
        job.setJarByClass(MeanShiftCanopyDriver.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(MeanShiftCanopy.class);
        job.setMapperClass(MeanShiftCanopyCreatorMapper.class);
        job.setNumReduceTasks(0);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        FileInputFormat.setInputPaths(job, new Path[]{path});
        FileOutputFormat.setOutputPath(job, path2);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("Mean Shift createCanopyFromVectorsMR failed on input " + path);
        }
    }

    public static Path buildClusters(Configuration configuration, Path path, Path path2, DistanceMeasure distanceMeasure, IKernelProfile iKernelProfile, double d, double d2, double d3, int i, boolean z, boolean z2) throws IOException, InterruptedException, ClassNotFoundException {
        return z ? buildClustersSeq(path, path2, distanceMeasure, iKernelProfile, d, d2, d3, i, z2) : buildClustersMR(configuration, path, path2, distanceMeasure, iKernelProfile, d, d2, d3, i, z2);
    }

    /* JADX WARN: Multi-variable type inference failed */
    private static Path buildClustersSeq(Path path, Path path2, DistanceMeasure distanceMeasure, IKernelProfile iKernelProfile, double d, double d2, double d3, int i, boolean z) throws IOException {
        MeanShiftCanopyClusterer meanShiftCanopyClusterer = new MeanShiftCanopyClusterer(distanceMeasure, iKernelProfile, d, d2, d3, z);
        List<MeanShiftCanopy> newArrayList = Lists.newArrayList();
        Configuration configuration = new Configuration();
        FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
        Iterator it = new SequenceFileDirValueIterable(path, PathType.LIST, PathFilters.logsCRCFilter(), configuration).iterator();
        while (it.hasNext()) {
            meanShiftCanopyClusterer.mergeCanopy((MeanShiftCanopy) it.next(), newArrayList);
        }
        boolean[] zArr = {false};
        int i2 = 1;
        while (!zArr[0] && i2 <= i) {
            log.info("Mean Shift Iteration: {}", Integer.valueOf(i2));
            newArrayList = meanShiftCanopyClusterer.iterate(newArrayList, zArr);
            Path path3 = new Path(path2, Cluster.CLUSTERS_DIR + i2);
            SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, configuration, new Path(path3, "part-r-00000"), Text.class, MeanShiftCanopy.class);
            try {
                for (MeanShiftCanopy meanShiftCanopy : newArrayList) {
                    if (log.isDebugEnabled()) {
                        log.debug("Writing Cluster:{} center:{} numPoints:{} radius:{} to: {}", new Object[]{Integer.valueOf(meanShiftCanopy.getId()), AbstractCluster.formatVector(meanShiftCanopy.getCenter(), null), Long.valueOf(meanShiftCanopy.getNumPoints()), AbstractCluster.formatVector(meanShiftCanopy.getRadius(), null), path3.getName()});
                    }
                    writer.append(new Text(meanShiftCanopy.getIdentifier()), meanShiftCanopy);
                }
                i2++;
            } finally {
                Closeables.closeQuietly(writer);
            }
        }
        Path path4 = new Path(path2, Cluster.CLUSTERS_DIR + (i2 - 1) + Cluster.FINAL_ITERATION_SUFFIX);
        FileSystem.get(configuration).rename(new Path(path2, Cluster.CLUSTERS_DIR + (i2 - 1)), path4);
        return path4;
    }

    private static Path buildClustersMR(Configuration configuration, Path path, Path path2, DistanceMeasure distanceMeasure, IKernelProfile iKernelProfile, double d, double d2, double d3, int i, boolean z) throws IOException, InterruptedException, ClassNotFoundException {
        boolean z2 = false;
        int i2 = 1;
        while (!z2 && i2 <= i) {
            int intValue = Integer.valueOf(configuration.get(MAPRED_REDUCE_TASKS, SchemaSymbols.ATTVAL_TRUE_1)).intValue();
            log.info("Mean Shift Iteration: {}, numReducers {}", new Object[]{Integer.valueOf(i2), Integer.valueOf(intValue)});
            Path path3 = new Path(path2, Cluster.CLUSTERS_DIR + i2);
            Path path4 = new Path(path2, CONTROL_CONVERGED);
            runIterationMR(configuration, path, path3, path4, distanceMeasure.getClass().getName(), iKernelProfile.getClass().getName(), d, d2, d3, z);
            z2 = FileSystem.get(new Configuration()).exists(path4);
            path = path3;
            i2++;
            if (intValue > 1) {
                configuration.set(MAPRED_REDUCE_TASKS, String.valueOf(intValue - 1));
            }
        }
        Path path5 = new Path(path2, Cluster.CLUSTERS_DIR + (i2 - 1) + Cluster.FINAL_ITERATION_SUFFIX);
        FileSystem.get(configuration).rename(new Path(path2, Cluster.CLUSTERS_DIR + (i2 - 1)), path5);
        return path5;
    }

    private static void runIterationMR(Configuration configuration, Path path, Path path2, Path path3, String str, String str2, double d, double d2, double d3, boolean z) throws IOException, InterruptedException, ClassNotFoundException {
        configuration.set("org.apache.mahout.clustering.canopy.measure", str);
        configuration.set(MeanShiftCanopyConfigKeys.KERNEL_PROFILE_KEY, str2);
        configuration.set(MeanShiftCanopyConfigKeys.CLUSTER_CONVERGENCE_KEY, String.valueOf(d3));
        configuration.set("org.apache.mahout.clustering.canopy.t1", String.valueOf(d));
        configuration.set("org.apache.mahout.clustering.canopy.t2", String.valueOf(d2));
        configuration.set(MeanShiftCanopyConfigKeys.CONTROL_PATH_KEY, path3.toString());
        configuration.set(MeanShiftCanopyConfigKeys.CLUSTER_POINTS_KEY, String.valueOf(z));
        Job job = new Job(configuration, "Mean Shift Driver running runIteration over input: " + path);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(MeanShiftCanopy.class);
        FileInputFormat.setInputPaths(job, new Path[]{path});
        FileOutputFormat.setOutputPath(job, path2);
        job.setMapperClass(MeanShiftCanopyMapper.class);
        job.setReducerClass(MeanShiftCanopyReducer.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setJarByClass(MeanShiftCanopyDriver.class);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("Mean Shift Iteration failed on input " + path);
        }
    }

    public static void clusterData(Path path, Path path2, Path path3, boolean z) throws IOException, InterruptedException, ClassNotFoundException {
        if (z) {
            clusterDataSeq(path, path2, path3);
        } else {
            clusterDataMR(path, path2, path3);
        }
    }

    private static void clusterDataSeq(Path path, Path path2, Path path3) throws IOException {
        ArrayList newArrayList = Lists.newArrayList();
        Configuration configuration = new Configuration();
        Iterator it = new SequenceFileDirValueIterable(path2, PathType.LIST, PathFilters.logsCRCFilter(), configuration).iterator();
        while (it.hasNext()) {
            newArrayList.add((MeanShiftCanopy) it.next());
        }
        FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
        int i = 0;
        for (FileStatus fileStatus : fileSystem.listStatus(path, PathFilters.logsCRCFilter())) {
            int i2 = i;
            i++;
            SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, configuration, new Path(path3, "part-m-" + i2), IntWritable.class, WeightedVectorWritable.class);
            try {
                Iterator it2 = new SequenceFileIterable(fileStatus.getPath(), configuration).iterator();
                while (it2.hasNext()) {
                    MeanShiftCanopy meanShiftCanopy = (MeanShiftCanopy) ((Pair) it2.next()).getSecond();
                    writer.append(new IntWritable(MeanShiftCanopyClusterer.findCoveringCanopy(meanShiftCanopy, newArrayList).getId()), new WeightedVectorWritable(1.0d, meanShiftCanopy.getCenter()));
                }
            } finally {
                Closeables.closeQuietly(writer);
            }
        }
    }

    private static void clusterDataMR(Path path, Path path2, Path path3) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration configuration = new Configuration();
        configuration.set(STATE_IN_KEY, path2.toString());
        Job job = new Job(configuration, "Mean Shift Driver running clusterData over input: " + path);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(WeightedVectorWritable.class);
        job.setMapperClass(MeanShiftCanopyClusterMapper.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setNumReduceTasks(0);
        job.setJarByClass(MeanShiftCanopyDriver.class);
        FileInputFormat.setInputPaths(job, new Path[]{path});
        FileOutputFormat.setOutputPath(job, path3);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("Mean Shift Clustering failed on clustersIn " + path2);
        }
    }
}
