package org.apache.mahout.clustering.minhash;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.RandomWrapper;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;

/* loaded from: input_file:org/apache/mahout/clustering/minhash/LastfmClusterEvaluator.class */
public final class LastfmClusterEvaluator {
    private LastfmClusterEvaluator() {
    }

    private static String usedMemory() {
        Runtime runtime = Runtime.getRuntime();
        return "Used Memory: [" + ((runtime.totalMemory() - runtime.freeMemory()) / 1048576) + " MB] ";
    }

    private static double computeSimilarity(Iterable<Integer> iterable, Iterable<Integer> iterable2) {
        HashSet newHashSet = Sets.newHashSet();
        Iterator<Integer> it = iterable.iterator();
        while (it.hasNext()) {
            newHashSet.add(it.next());
        }
        HashSet newHashSet2 = Sets.newHashSet();
        Iterator<Integer> it2 = iterable2.iterator();
        while (it2.hasNext()) {
            newHashSet2.add(it2.next());
        }
        HashSet newHashSet3 = Sets.newHashSet();
        newHashSet3.retainAll(newHashSet2);
        double size = newHashSet3.size();
        newHashSet.addAll(newHashSet2);
        double size2 = newHashSet.size();
        if (size2 == 0.0d) {
            return 0.0d;
        }
        return size / size2;
    }

    private static void testPrecision(Path path, double d, double d2) {
        Configuration configuration = new Configuration();
        RandomWrapper random = RandomUtils.getRandom();
        Text text = new Text();
        ArrayList newArrayList = Lists.newArrayList();
        long j = 0;
        long j2 = 0;
        int i = 0;
        Iterator it = new SequenceFileIterable(path, true, configuration).iterator();
        while (it.hasNext()) {
            Pair pair = (Pair) it.next();
            Text text2 = (Text) pair.getFirst();
            VectorWritable vectorWritable = (VectorWritable) pair.getSecond();
            if (!text2.equals(text)) {
                text.set(text2.toString());
                if (random.nextDouble() > d2) {
                    newArrayList.clear();
                } else {
                    int size = newArrayList.size();
                    j2 += size;
                    for (int i2 = 0; i2 < size; i2++) {
                        List list = (List) newArrayList.get(i2);
                        for (int i3 = i2 + 1; i3 < size; i3++) {
                            j += computeSimilarity(list, (List) newArrayList.get(i3)) >= d ? 1L : 0L;
                        }
                    }
                    newArrayList.clear();
                    i++;
                    System.out.print('\r' + usedMemory() + " Clusters processed: " + i);
                }
            }
            ArrayList newArrayList2 = Lists.newArrayList();
            Iterator<Vector.Element> it2 = vectorWritable.get().nonZeroes().iterator();
            while (it2.hasNext()) {
                newArrayList2.add(Integer.valueOf((int) it2.next().get()));
            }
            newArrayList.add(newArrayList2);
        }
        System.out.println("\nTest Results");
        System.out.println("=============");
        System.out.println(" (A) Listeners in same cluster with simiarity above threshold (" + d + ") : " + j);
        System.out.println(" (B) All listeners: " + j2);
        NumberFormat numberFormat = NumberFormat.getInstance();
        numberFormat.setMaximumFractionDigits(2);
        System.out.println(" Average cluster precision: A/B = " + numberFormat.format((j / j2) * 100.0d));
    }

    public static void main(String[] strArr) {
        if (strArr.length >= 3) {
            testPrecision(new Path(strArr[0]), Double.parseDouble(strArr[1]), Double.parseDouble(strArr[2]));
            return;
        }
        System.out.println("LastfmClusterEvaluation <cluster-file> <threshold> <sample-percentage>");
        System.out.println("      <cluster-file>: Absolute Path of file containing cluster information in DEBUG format");
        System.out.println("         <threshold>: Minimum threshold for jaccard co-efficient for considering two items");
        System.out.println("                      in a cluster to be really similar. Should be between 0.0 and 1.0");
        System.out.println(" <sample-percentage>: Percentage of clusters to sample. Should be between 0.0 and 1.0");
    }
}
