/*
 * Decompiled with CFR 0.152.
 */
package io.nosqlbench.nbvectors.verifyknn;

import io.jhdf.HdfFile;
import io.nosqlbench.nbvectors.verifyknn.computation.NeighborhoodComparison;
import io.nosqlbench.nbvectors.verifyknn.datatypes.LongIndexedFloatVector;
import io.nosqlbench.nbvectors.verifyknn.datatypes.NeighborIndex;
import io.nosqlbench.nbvectors.verifyknn.datatypes.Neighborhood;
import io.nosqlbench.nbvectors.verifyknn.logging.CustomConfigurationFactory;
import io.nosqlbench.nbvectors.verifyknn.options.ConsoleDiagnostics;
import io.nosqlbench.nbvectors.verifyknn.options.DistanceFunction;
import io.nosqlbench.nbvectors.verifyknn.options.ErrorMode;
import io.nosqlbench.nbvectors.verifyknn.options.Interval;
import io.nosqlbench.nbvectors.verifyknn.options.IntervalParser;
import io.nosqlbench.nbvectors.verifyknn.readers.KNNData;
import io.nosqlbench.nbvectors.verifyknn.statusview.StatusMode;
import io.nosqlbench.nbvectors.verifyknn.statusview.StatusView;
import io.nosqlbench.nbvectors.verifyknn.statusview.StatusViewLanterna;
import io.nosqlbench.nbvectors.verifyknn.statusview.StatusViewNoOp;
import io.nosqlbench.nbvectors.verifyknn.statusview.StatusViewRouter;
import io.nosqlbench.nbvectors.verifyknn.statusview.StatusViewStdout;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Comparator;
import java.util.concurrent.Callable;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import picocli.CommandLine;

@CommandLine.Command(name="verifyknn", headerHeading="Usage:%n%n", synopsisHeading="%n", descriptionHeading="%nDescription%n%n", parameterListHeading="%nParameters:%n%", optionListHeading="%nOptions:%n", header={"self-check KNN test data answer-keys"}, description={"Reads query vectors from HDF5 data, computes KNN neighborhoods, and\ncompares them against the answer-key data given. This is a pure Java\nimplementation which requires no other vector processing libraries\nor hardware, so it has two key trade-offs with other methods:\n1. It is not as fast as a GPU or TPU. It is not expected to be.\n2. It is a vastly simpler implementation, which makes it arguably easier\n   to rely on as a basic verification tool.\nThis utility is meant to be used in concert with other tools which are\nfaster, but which may benefit from the assurance of a basic coherence check.\nIn essence, if you are not sure your test data is self-correct, then use\nthis tool to double check it with some sparse sampling.\n\nThe currently supported distance functions and file formats are indicated\nby the available command line options.\n\nThe pseudo-standard HDF5 KNN answer-key file format is documented here:\nhttps://github.com/nosqlbench/nbdatatools/blob/main/nbvectors/src/docs/hdf5_vectors.md\n"}, exitCodeListHeading="Exit Codes:%n", exitCodeList={"0: all tested neighborhoods were correct", "2: at least one tested neighborhood was incorrect"})
public class CMD_VerifyKNN
implements Callable<Integer> {
    private static final Logger logger = LogManager.getLogger(CMD_VerifyKNN.class);
    @CommandLine.Option(names={"-i", "--interval"}, converter={IntervalParser.class}, defaultValue="1", description={"The index or closed..open range of indices to test"})
    private Interval interval;
    @CommandLine.Option(names={"-h", "--help"}, usageHelp=true, description={"display a help message"})
    private boolean helpRequested = false;
    @CommandLine.Option(names={"-f", "--hdf_file"}, required=true, description={"The HDF5 file to load"})
    private Path hdfpath;
    @CommandLine.Option(names={"-d", "--distance_function"}, defaultValue="COSINE", description={"Valid values: ${COMPLETION-CANDIDATES}"})
    private DistanceFunction distanceFunction;
    @CommandLine.Option(names={"-k", "--neighborhood_size"}, defaultValue="100", description={"The neighborhood size"})
    private int K;
    @CommandLine.Option(names={"-l", "--buffer_limit"}, defaultValue="-1", description={"The buffer size to retain between sorts by distance, selected automatically when unset as a power of ten such that 10 chunks are needed for processing each query"})
    private int buffer_limit;
    @CommandLine.Option(names={"-s", "--status"}, defaultValue="all", description={"Valid values: ${COMPLETION-CANDIDATES}"})
    private StatusMode output;
    @CommandLine.Option(names={"-e", "--error_mode"}, defaultValue="fail", description={"Valid values: ${COMPLETION-CANDIDATES}"})
    private ErrorMode errorMode;
    @CommandLine.Option(names={"-p", "--phi"}, defaultValue="0.001d", description={"When comparing values which are not exact, due to floating point rounding\nerrors, the distance within which the values are considered effectively\nthe same.\n"})
    double phi;
    @CommandLine.Option(names={"--_diaglevel", "-_d"}, hidden=true, description={"Internal diagnostic level, sends content directly to the console."}, defaultValue="ERROR")
    ConsoleDiagnostics diaglevel;

    public static void main(String[] args) {
        System.setProperty("slf4j.internal.verbosity", "ERROR");
        System.setProperty("log4j.configurationFactory", CustomConfigurationFactory.class.getCanonicalName());
        logger.info("starting main");
        logger.info("instancing command");
        CMD_VerifyKNN command = new CMD_VerifyKNN();
        logger.info("instancing commandline");
        CommandLine commandLine = new CommandLine((Object)command).setCaseInsensitiveEnumValuesAllowed(true).setOptionsCaseInsensitive(true);
        logger.info("executing commandline");
        int exitCode = commandLine.execute(args);
        logger.info("exiting main");
        System.exit(exitCode);
    }

    @Override
    public Integer call() throws Exception {
        int errors = 0;
        try (StatusView view = this.getStatusView();
             KNNData knndata = new KNNData(new HdfFile(this.hdfpath));){
            view.onStart(this.interval.count());
            for (long index = this.interval.min(); index < this.interval.max(); ++index) {
                LongIndexedFloatVector providedTestVector = knndata.readHdf5TestVector(index);
                view.onQueryVector(providedTestVector, index, this.interval.max());
                Neighborhood providedNeighborhood = knndata.neighborhood(providedTestVector.index());
                Neighborhood expectedNeighborhood = this.computeNeighborhood(providedTestVector, knndata, view);
                NeighborhoodComparison comparison = new NeighborhoodComparison(providedTestVector, providedNeighborhood, expectedNeighborhood);
                view.onNeighborhoodComparison(comparison);
                if ((errors += comparison.isError() ? 1 : 0) > 0 && this.errorMode == ErrorMode.Fail) break;
            }
            view.end();
        }
        return errors > 0 ? 2 : 0;
    }

    private StatusView getStatusView() {
        StatusViewRouter view = new StatusViewRouter();
        switch (this.output) {
            case All: 
            case Progress: {
                view.add(new StatusViewLanterna(Math.min(3, this.interval.count())));
            }
        }
        switch (this.output) {
            case All: 
            case Stdout: {
                view.add(new StatusViewStdout(view.isEmpty()));
            }
        }
        return view.isEmpty() ? new StatusViewNoOp() : view;
    }

    private Neighborhood computeNeighborhood(LongIndexedFloatVector testVector, KNNData data, StatusView view) {
        this.buffer_limit = this.buffer_limit > 0 ? this.buffer_limit : this.computeBufferLimit(data.trainingVectorCount());
        float[] testVecAry = testVector.vector();
        int totalTrainingVectors = data.trainingVectorCount();
        NeighborIndex[] topKResultBuffer = new NeighborIndex[]{};
        for (int chunk = 0; chunk < totalTrainingVectors; chunk += this.buffer_limit) {
            int chunkSize = Math.min(chunk + this.buffer_limit, totalTrainingVectors) - chunk;
            NeighborIndex[] unsortedNeighbors = new NeighborIndex[chunkSize + topKResultBuffer.length];
            System.arraycopy(topKResultBuffer, 0, unsortedNeighbors, chunkSize, topKResultBuffer.length);
            view.onChunk(chunk, chunkSize, totalTrainingVectors);
            for (int i = 0; i < chunkSize; ++i) {
                int testVectorOrdinal = chunk + i;
                float[] trainVector = data.train(testVectorOrdinal);
                double distance = this.distanceFunction.distance(testVecAry, trainVector);
                unsortedNeighbors[i] = new NeighborIndex(testVectorOrdinal, distance);
            }
            Arrays.sort(unsortedNeighbors, Comparator.comparing(NeighborIndex::distance));
            topKResultBuffer = new NeighborIndex[this.K];
            System.arraycopy(unsortedNeighbors, 0, topKResultBuffer, 0, topKResultBuffer.length);
        }
        return new Neighborhood(topKResultBuffer);
    }

    private int computeBufferLimit(int totalTrainingVectors) {
        int limit;
        for (limit = 10; limit * 10 < totalTrainingVectors && limit < 100000; limit *= 10) {
        }
        return limit;
    }
}

