package org.apache.mahout.clustering.fuzzykmeans;

import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.AbstractCluster;
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.ClusterObservations;
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterable;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.VectorSimilarityMeasure;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.class */
public class FuzzyKMeansDriver extends AbstractJob {
    public static final String M_OPTION = "m";
    private static final Logger log = LoggerFactory.getLogger(FuzzyKMeansDriver.class);

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new Configuration(), new FuzzyKMeansDriver(), strArr);
    }

    public int run(String[] strArr) throws Exception {
        addInputOption();
        addOutputOption();
        addOption(DefaultOptionCreator.distanceMeasureOption().create());
        addOption(DefaultOptionCreator.clustersInOption().withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.  If k is also specified, then a random set of vectors will be selected and written out to this path first").create());
        addOption(DefaultOptionCreator.numClustersOption().withDescription("The k in k-Means.  If specified, then a random selection of k Vectors will be chosen as the Centroid and written to the clusters input path.").create());
        addOption(DefaultOptionCreator.convergenceOption().create());
        addOption(DefaultOptionCreator.maxIterationsOption().create());
        addOption(DefaultOptionCreator.overwriteOption().create());
        addOption(M_OPTION, M_OPTION, "coefficient normalization factor, must be greater than 1", true);
        addOption(DefaultOptionCreator.clusteringOption().create());
        addOption(DefaultOptionCreator.emitMostLikelyOption().create());
        addOption(DefaultOptionCreator.thresholdOption().create());
        addOption(DefaultOptionCreator.methodOption().create());
        if (parseArguments(strArr) == null) {
            return -1;
        }
        Path inputPath = getInputPath();
        Path path = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
        Path outputPath = getOutputPath();
        String option = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
        if (option == null) {
            option = SquaredEuclideanDistanceMeasure.class.getName();
        }
        double parseDouble = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
        float parseFloat = Float.parseFloat(getOption(M_OPTION));
        int parseInt = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
        if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
            HadoopUtil.delete(getConf(), outputPath);
        }
        boolean parseBoolean = Boolean.parseBoolean(getOption(DefaultOptionCreator.EMIT_MOST_LIKELY_OPTION));
        double parseDouble2 = Double.parseDouble(getOption(DefaultOptionCreator.THRESHOLD_OPTION));
        DistanceMeasure distanceMeasure = (DistanceMeasure) ClassUtils.instantiateAs(option, DistanceMeasure.class);
        if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
            path = RandomSeedGenerator.buildRandom(getConf(), inputPath, path, Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)), distanceMeasure);
        }
        run(getConf(), inputPath, path, outputPath, distanceMeasure, parseDouble, parseInt, parseFloat, hasOption(DefaultOptionCreator.CLUSTERING_OPTION), parseBoolean, parseDouble2, getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase("sequential"));
        return 0;
    }

    public static void run(Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, double d, int i, float f, boolean z, boolean z2, double d2, boolean z3) throws IOException, ClassNotFoundException, InterruptedException {
        Path buildClusters = buildClusters(new Configuration(), path, path2, path3, distanceMeasure, d, i, f, z3);
        if (z) {
            log.info("Clustering ");
            clusterData(path, buildClusters, new Path(path3, "clusteredPoints"), distanceMeasure, d, f, z2, d2, z3);
        }
    }

    private static boolean runIteration(Configuration configuration, Path path, Path path2, Path path3, String str, double d, float f) throws IOException, InterruptedException, ClassNotFoundException {
        configuration.set("org.apache.mahout.clustering.kmeans.path", path2.toString());
        configuration.set("org.apache.mahout.clustering.kmeans.measure", str);
        configuration.set("org.apache.mahout.clustering.kmeans.convergence", String.valueOf(d));
        configuration.set(FuzzyKMeansConfigKeys.M_KEY, String.valueOf(f));
        configuration.set(FuzzyKMeansConfigKeys.EMIT_MOST_LIKELY_KEY, Boolean.toString(true));
        configuration.set(FuzzyKMeansConfigKeys.THRESHOLD_KEY, Double.toString(VectorSimilarityMeasure.NO_NORM));
        Job job = new Job(configuration, "FuzzyKMeans Driver running runIteration over clustersIn: " + path2);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(ClusterObservations.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(SoftCluster.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setMapperClass(FuzzyKMeansMapper.class);
        job.setCombinerClass(FuzzyKMeansCombiner.class);
        job.setReducerClass(FuzzyKMeansReducer.class);
        job.setJarByClass(FuzzyKMeansDriver.class);
        FileInputFormat.addInputPath(job, path);
        FileOutputFormat.setOutputPath(job, path3);
        if (job.waitForCompletion(true)) {
            return isConverged(path3, configuration, FileSystem.get(path3.toUri(), configuration));
        }
        throw new InterruptedException("Fuzzy K-Means Iteration failed processing " + path2);
    }

    public static void run(Configuration configuration, Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, double d, int i, float f, boolean z, boolean z2, double d2, boolean z3) throws IOException, ClassNotFoundException, InterruptedException {
        Path buildClusters = buildClusters(configuration, path, path2, path3, distanceMeasure, d, i, f, z3);
        if (z) {
            log.info("Clustering");
            clusterData(path, buildClusters, new Path(path3, "clusteredPoints"), distanceMeasure, d, f, z2, d2, z3);
        }
    }

    public static Path buildClusters(Configuration configuration, Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, double d, int i, float f, boolean z) throws IOException, InterruptedException, ClassNotFoundException {
        return z ? buildClustersSeq(path, path2, path3, distanceMeasure, d, i, f) : buildClustersMR(configuration, path, path2, path3, distanceMeasure, d, i, f);
    }

    private static Path buildClustersSeq(Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, double d, int i, float f) throws IOException {
        FuzzyKMeansClusterer fuzzyKMeansClusterer = new FuzzyKMeansClusterer(distanceMeasure, d, f);
        ArrayList newArrayList = Lists.newArrayList();
        FuzzyKMeansUtil.configureWithClusterInfo(path2, newArrayList);
        if (newArrayList.isEmpty()) {
            throw new IllegalStateException("Clusters is empty!");
        }
        boolean z = false;
        int i2 = 1;
        Configuration configuration = new Configuration();
        while (!z && i2 <= i) {
            log.info("Fuzzy k-Means Iteration: {}", Integer.valueOf(i2));
            FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
            Iterator it = new SequenceFileDirValueIterable(path, PathType.LIST, PathFilters.logsCRCFilter(), configuration).iterator();
            while (it.hasNext()) {
                fuzzyKMeansClusterer.addPointToClusters(newArrayList, ((VectorWritable) it.next()).get());
            }
            z = fuzzyKMeansClusterer.testConvergence(newArrayList);
            Path path4 = new Path(path3, Cluster.CLUSTERS_DIR + i2);
            SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, configuration, new Path(path4, "part-r-00000"), Text.class, SoftCluster.class);
            try {
                for (SoftCluster softCluster : newArrayList) {
                    if (log.isDebugEnabled()) {
                        log.debug("Writing Cluster:{} center:{} numPoints:{} radius:{} to: {}", new Object[]{Integer.valueOf(softCluster.getId()), AbstractCluster.formatVector(softCluster.getCenter(), null), Long.valueOf(softCluster.getNumPoints()), AbstractCluster.formatVector(softCluster.getRadius(), null), path4.getName()});
                    }
                    writer.append(new Text(softCluster.getIdentifier()), softCluster);
                }
                i2++;
            } finally {
                Closeables.closeQuietly(writer);
            }
        }
        Path path5 = new Path(path3, Cluster.CLUSTERS_DIR + (i2 - 1) + Cluster.FINAL_ITERATION_SUFFIX);
        FileSystem.get(configuration).rename(new Path(path3, Cluster.CLUSTERS_DIR + (i2 - 1)), path5);
        return path5;
    }

    private static Path buildClustersMR(Configuration configuration, Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, double d, int i, float f) throws IOException, InterruptedException, ClassNotFoundException {
        boolean z = false;
        int i2 = 1;
        while (!z && i2 <= i) {
            log.info("Fuzzy K-Means Iteration {}", Integer.valueOf(i2));
            Path path4 = new Path(path3, Cluster.CLUSTERS_DIR + i2);
            z = runIteration(configuration, path, path2, path4, distanceMeasure.getClass().getName(), d, f);
            path2 = path4;
            i2++;
        }
        Path path5 = new Path(path3, Cluster.CLUSTERS_DIR + (i2 - 1) + Cluster.FINAL_ITERATION_SUFFIX);
        FileSystem.get(configuration).rename(new Path(path3, Cluster.CLUSTERS_DIR + (i2 - 1)), path5);
        return path5;
    }

    public static void clusterData(Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, double d, float f, boolean z, double d2, boolean z2) throws IOException, ClassNotFoundException, InterruptedException {
        if (z2) {
            clusterDataSeq(path, path2, path3, distanceMeasure, d, f);
        } else {
            clusterDataMR(path, path2, path3, distanceMeasure, d, f, z, d2);
        }
    }

    private static void clusterDataSeq(Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, double d, float f) throws IOException {
        FuzzyKMeansClusterer fuzzyKMeansClusterer = new FuzzyKMeansClusterer(distanceMeasure, d, f);
        ArrayList newArrayList = Lists.newArrayList();
        FuzzyKMeansUtil.configureWithClusterInfo(path2, newArrayList);
        if (newArrayList.isEmpty()) {
            throw new IllegalStateException("Clusters is empty!");
        }
        Configuration configuration = new Configuration();
        FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
        for (FileStatus fileStatus : fileSystem.listStatus(path, PathFilters.logsCRCFilter())) {
            SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, configuration, new Path(path3, "part-m-0"), IntWritable.class, WeightedVectorWritable.class);
            try {
                Iterator it = new SequenceFileValueIterable(fileStatus.getPath(), configuration).iterator();
                while (it.hasNext()) {
                    fuzzyKMeansClusterer.emitPointToClusters((VectorWritable) it.next(), newArrayList, writer);
                }
            } finally {
                Closeables.closeQuietly(writer);
            }
        }
    }

    private static void clusterDataMR(Path path, Path path2, Path path3, DistanceMeasure distanceMeasure, double d, float f, boolean z, double d2) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration configuration = new Configuration();
        configuration.set("org.apache.mahout.clustering.kmeans.path", path2.toString());
        configuration.set("org.apache.mahout.clustering.kmeans.measure", distanceMeasure.getClass().getName());
        configuration.set("org.apache.mahout.clustering.kmeans.convergence", String.valueOf(d));
        configuration.set(FuzzyKMeansConfigKeys.M_KEY, String.valueOf(f));
        configuration.set(FuzzyKMeansConfigKeys.EMIT_MOST_LIKELY_KEY, Boolean.toString(z));
        configuration.set(FuzzyKMeansConfigKeys.THRESHOLD_KEY, Double.toString(d2));
        path3.getFileSystem(configuration).delete(path3, true);
        Job job = new Job(configuration, "FuzzyKMeans Driver running clusterData over input: " + path);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(WeightedVectorWritable.class);
        FileInputFormat.setInputPaths(job, new Path[]{path});
        FileOutputFormat.setOutputPath(job, path3);
        job.setMapperClass(FuzzyKMeansClusterMapper.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setNumReduceTasks(0);
        job.setJarByClass(FuzzyKMeansDriver.class);
        if (!job.waitForCompletion(true)) {
            throw new InterruptedException("Fuzzy K-Means Clustering failed processing " + path2);
        }
    }

    private static boolean isConverged(Path path, Configuration configuration, FileSystem fileSystem) throws IOException {
        Path path2 = new Path(path, "*");
        ArrayList newArrayList = Lists.newArrayList();
        for (FileStatus fileStatus : fileSystem.listStatus(FileUtil.stat2Paths(fileSystem.globStatus(path2, PathFilters.partFilter())), PathFilters.partFilter())) {
            newArrayList.add(fileSystem.makeQualified(fileStatus.getPath()));
        }
        boolean z = true;
        Iterator it = newArrayList.iterator();
        while (it.hasNext()) {
            SequenceFileValueIterator sequenceFileValueIterator = new SequenceFileValueIterator((Path) it.next(), true, configuration);
            while (z) {
                try {
                    if (!sequenceFileValueIterator.hasNext()) {
                        break;
                    }
                    z = ((SoftCluster) sequenceFileValueIterator.next()).isConverged();
                } finally {
                    Closeables.closeQuietly(sequenceFileValueIterator);
                }
            }
        }
        return z;
    }
}
