package org.apache.mahout.clustering.minhash;

import com.google.common.base.Charsets;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.io.Closeables;
import com.google.common.io.Files;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.mahout.math.SequentialAccessSparseVector;
import org.apache.mahout.math.VectorWritable;

/* loaded from: input_file:org/apache/mahout/clustering/minhash/LastfmDataConverter.class */
public final class LastfmDataConverter {
    private static final Pattern TAB_PATTERN = Pattern.compile("\t");

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/apache/mahout/clustering/minhash/LastfmDataConverter$Lastfm.class */
    public enum Lastfm {
        USERS_360K(17559530),
        USERS_1K(19150868);

        private final int totalRecords;

        Lastfm(int i) {
            this.totalRecords = i;
        }

        int getTotalRecords() {
            return this.totalRecords;
        }
    }

    private LastfmDataConverter() {
    }

    private static String usedMemory() {
        Runtime runtime = Runtime.getRuntime();
        return "Used Memory: [" + ((runtime.totalMemory() - runtime.freeMemory()) / 1048576) + " MB] ";
    }

    private static String getFeature(String[] strArr, Lastfm lastfm) {
        return lastfm == Lastfm.USERS_360K ? strArr[0] : strArr[2];
    }

    private static String getItem(String[] strArr, Lastfm lastfm) {
        return lastfm == Lastfm.USERS_360K ? strArr[2] : strArr[0];
    }

    public static Map<String, List<Integer>> convertToItemFeatures(String str, Lastfm lastfm) throws IOException {
        long totalRecords = lastfm.getTotalRecords();
        HashMap newHashMap = Maps.newHashMap();
        HashMap newHashMap2 = Maps.newHashMap();
        String str2 = usedMemory() + "Converting data to internal vector format: ";
        BufferedReader newReader = Files.newReader(new File(str), Charsets.UTF_8);
        try {
            System.out.print(str2);
            int i = 1;
            double d = 0.0d;
            long j = 0;
            while (true) {
                String readLine = newReader.readLine();
                if (readLine == null) {
                    System.out.print('\r' + (usedMemory() + "Converting data to internal vector format: ") + d + "% Completed\n");
                    Closeables.close(newReader, true);
                    return newHashMap2;
                }
                String[] split = TAB_PATTERN.split(readLine);
                String feature = getFeature(split, lastfm);
                String item = getItem(split, lastfm);
                Integer num = (Integer) newHashMap.get(feature);
                if (num == null) {
                    num = Integer.valueOf(newHashMap.size() + 1);
                    newHashMap.put(feature, num);
                }
                List list = (List) newHashMap2.get(item);
                if (list == null) {
                    list = Lists.newArrayList();
                    newHashMap2.put(item, list);
                }
                list.add(num);
                long j2 = j + 1;
                d = (j2 * 100.0d) / totalRecords;
                String str3 = usedMemory() + "Converting data to internal vector format: ";
                if (d > i) {
                    System.out.print('\r' + str3 + d + '%');
                    i++;
                }
                j = j2 + 1;
            }
        } catch (Throwable th) {
            Closeables.close(newReader, true);
            throw th;
        }
    }

    public static boolean writeToSequenceFile(Map<String, List<Integer>> map, Path path) throws IOException {
        Configuration configuration = new Configuration();
        FileSystem fileSystem = FileSystem.get(configuration);
        fileSystem.mkdirs(path.getParent());
        long size = map.size();
        SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, configuration, path, Text.class, VectorWritable.class);
        try {
            System.out.print("Now writing vectorized data in sequence file format: ");
            Text text = new Text();
            VectorWritable vectorWritable = new VectorWritable();
            int i = 0;
            int i2 = 1;
            for (Map.Entry<String, List<Integer>> entry : map.entrySet()) {
                int size2 = entry.getValue().size();
                text.set(entry.getKey());
                SequentialAccessSparseVector sequentialAccessSparseVector = new SequentialAccessSparseVector(size2);
                int i3 = 0;
                Iterator<Integer> it = entry.getValue().iterator();
                while (it.hasNext()) {
                    int i4 = i3;
                    i3++;
                    sequentialAccessSparseVector.setQuick(i4, it.next().intValue());
                }
                vectorWritable.set(sequentialAccessSparseVector);
                writer.append(text, vectorWritable);
                i++;
                double d = (i * 100.0d) / size;
                if (d > i2) {
                    System.out.print("\rNow writing vectorized data in sequence file format: " + d + "% " + (d >= 100.0d ? "Completed\n" : ""));
                    i2++;
                }
            }
            return true;
        } finally {
            Closeables.close(writer, false);
        }
    }

    public static void main(String[] strArr) throws Exception {
        if (strArr.length < 3) {
            System.out.println("[Usage]: LastfmDataConverter <input> <output> <dataset>");
            System.out.println("   <input>: Absolute path to the local file [usersha1-artmbid-artname-plays.tsv] ");
            System.out.println("  <output>: Absolute path to the HDFS output file");
            System.out.println(" <dataset>: Either of the two Lastfm public datasets. Must be either 'Users360K' or 'Users1K'");
            System.out.println("Note:- Hadoop configuration pointing to HDFS namenode should be in classpath");
            return;
        }
        Map<String, List<Integer>> convertToItemFeatures = convertToItemFeatures(strArr[0], Lastfm.valueOf(strArr[2]));
        if (convertToItemFeatures.isEmpty()) {
            throw new IllegalStateException("Error converting the data file: [" + strArr[0] + ']');
        }
        Path path = new Path(strArr[1]);
        if (writeToSequenceFile(convertToItemFeatures, path)) {
            System.out.println("Data converted and written successfully to HDFS location: [" + path + ']');
        } else {
            System.err.println("Error writing the converted data to HDFS location: [" + path + ']');
        }
    }
}
