/*
 * Decompiled with CFR 0.152.
 */
package io.nosqlbench.nbvectors.datasource.parquet.layout;

import java.io.IOException;
import java.nio.file.FileVisitOption;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.rmi.RemoteException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class PathBinning
extends SimpleFileVisitor<Path> {
    private final Pattern regex;
    private int componentDepth = 0;
    private final Map<String, List<Path>> bins = new LinkedHashMap<String, List<Path>>();
    private static Pattern DEFAULT_BINNING_PATTERN = Pattern.compile("(?x)                      # Enable extended mode for comments and whitespace\n^                         # Start of string\n(?:\n  .*[\\\\\\\\/]               # Greedily match up to the last directory separator\n  (?<bin>[^\\\\\\\\/]+)    # Capture only the last directory name (parent directory)\n  [\\\\\\\\/]                 # Directory separator after the parent directory\n)?                        # End of optional parent directory group\n(?<file>[^\\\\\\\\/]+)        # Capture the file name (non-separator characters)\n$\n");

    public BinningResult getBins() {
        return new BinningResult((Map<? extends String, ? extends List<Path>>)this.bins);
    }

    public PathBinning(List<Path> paths) {
        this(paths, DEFAULT_BINNING_PATTERN);
    }

    public PathBinning(List<Path> paths, Pattern binningPattern) {
        this.regex = binningPattern;
        if (!this.regex.pattern().contains("?<bin>")) {
            throw new RuntimeException("Invalid binning pattern. There must be a named capture group bin, as in ...?<bin>...");
        }
        for (Path path : paths) {
            try {
                Files.walkFileTree(path, Set.of(FileVisitOption.FOLLOW_LINKS), 100, this);
            }
            catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    }

    @Override
    public FileVisitResult visitFile(Path path, BasicFileAttributes attrs) throws IOException {
        if (!path.toString().endsWith(".parquet")) {
            System.out.println("skipped:" + String.valueOf(path));
            return super.visitFile(path, attrs);
        }
        int nameCount = path.getNameCount();
        if (this.componentDepth == 0) {
            this.componentDepth = nameCount;
        }
        if (this.componentDepth != nameCount) {
            throw new RuntimeException("Binning files at different depths would produce undefined results.");
        }
        String pathString = path.toString();
        Matcher matcher = this.regex.matcher(pathString);
        if (matcher.matches()) {
            String bin = matcher.group("bin");
            if (bin == null) {
                throw new RemoteException("bin pattern failed to match againt " + pathString);
            }
            this.bins.computeIfAbsent(bin, b -> new ArrayList()).add(path);
        }
        return super.visitFile(path, attrs);
    }

    public static class BinningResult
    extends LinkedHashMap<String, List<Path>> {
        public BinningResult(Map<? extends String, ? extends List<Path>> m) {
            super(m);
        }

        public Map<Path, List<Path>> toParentGroups() {
            LinkedHashMap<Path, List<Path>> results = new LinkedHashMap<Path, List<Path>>();
            this.forEach((k, v) -> v.forEach((? super T p) -> {
                Path parent = p.getParent();
                if (parent == null && (parent = p.toAbsolutePath().getParent()) == null) {
                    throw new RuntimeException("unable to resolve parent of path " + String.valueOf(p));
                }
                results.computeIfAbsent(parent, n -> new ArrayList()).add(p);
            }));
            return results;
        }
    }
}

