package yamSS.simlib.ext;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/* loaded from: input_file:yamSS/simlib/ext/TokensIndexer.class */
public class TokensIndexer {
    List<String> labelList;
    double minWeight = Double.MAX_VALUE;
    double maxWeight = Double.MIN_VALUE;
    Map<String, Double> termWeightMap = new HashMap();
    LabelTokenizer tokenizer = new LabelTokenizer();
    PorterStemmer stemmer = new PorterStemmer();
    StopWords filter = StopWords.getMediumSet();

    public TokensIndexer(List<String> list) {
        this.labelList = list;
        if (list == null || list.isEmpty()) {
            return;
        }
        indexing();
    }

    public double getMinWeight() {
        return this.minWeight;
    }

    public List<String> getLabelList() {
        return this.labelList;
    }

    public Map<String, Double> getTermWeightMap() {
        return this.termWeightMap;
    }

    public List<String> tokenize(String str) {
        ArrayList arrayList = new ArrayList();
        ArrayList<String> arrayList2 = this.tokenizer.tokenize(str);
        if (arrayList2 == null) {
            return arrayList;
        }
        Iterator<String> it2 = arrayList2.iterator();
        while (it2.hasNext()) {
            arrayList.add(it2.next());
        }
        return arrayList;
    }

    public List<String> tokenize(String str, boolean z) {
        ArrayList arrayList = new ArrayList();
        ArrayList<String> arrayList2 = this.tokenizer.tokenize(str);
        if (arrayList2 == null) {
            return arrayList;
        }
        Iterator<String> it2 = arrayList2.iterator();
        while (it2.hasNext()) {
            String next = it2.next();
            if (!z || !this.filter.contains(next)) {
                arrayList.add(next);
            }
        }
        return arrayList;
    }

    void indexing() {
        Iterator<String> it2 = this.labelList.iterator();
        while (it2.hasNext()) {
            ArrayList<String> arrayList = this.tokenizer.tokenize(it2.next());
            if (arrayList != null) {
                Iterator<String> it3 = arrayList.iterator();
                while (it3.hasNext()) {
                    String lowerCase = it3.next().toLowerCase();
                    if (!this.filter.contains(lowerCase)) {
                        lowerCase = this.stemmer.stem(lowerCase);
                    }
                    if (this.termWeightMap.containsKey(lowerCase)) {
                        this.termWeightMap.put(lowerCase, new Double(this.termWeightMap.get(lowerCase).doubleValue() + 1.0d));
                    } else {
                        this.termWeightMap.put(lowerCase, new Double(1.0d));
                    }
                }
            }
        }
        for (Map.Entry<String, Double> entry : this.termWeightMap.entrySet()) {
            double log = Math.log(this.labelList.size() / entry.getValue().doubleValue());
            entry.setValue(Double.valueOf(log));
            if (!this.filter.contains(entry.getKey()) && log < this.minWeight) {
                this.minWeight = log;
            }
            if (!this.filter.contains(entry.getKey()) && log > this.maxWeight) {
                this.maxWeight = log;
            }
        }
    }

    public void normalizeWeight() {
        for (Map.Entry<String, Double> entry : this.termWeightMap.entrySet()) {
            double doubleValue = entry.getValue().doubleValue();
            double d = doubleValue / this.maxWeight;
            if (d != 1.0d) {
                d = sigmoid(doubleValue / this.maxWeight, 10.0d, 0.5d);
            }
            entry.setValue(Double.valueOf(d));
        }
    }

    public void clear() {
        this.termWeightMap.clear();
    }

    public double getTermWeight(String str) {
        String lowerCase = str.toLowerCase();
        if (!this.filter.contains(lowerCase)) {
            lowerCase = this.stemmer.stem(lowerCase);
        }
        if (this.termWeightMap.containsKey(lowerCase)) {
            return this.termWeightMap.get(lowerCase).doubleValue();
        }
        return 1.0d;
    }

    public double getTermWeight(String str, boolean z) {
        String lowerCase = str.toLowerCase();
        if (!this.filter.contains(lowerCase)) {
            lowerCase = this.stemmer.stem(lowerCase);
        }
        if (!this.termWeightMap.containsKey(lowerCase)) {
            return 1.0d;
        }
        double doubleValue = this.termWeightMap.get(lowerCase).doubleValue();
        if (z && this.filter.contains(str)) {
            doubleValue = Math.min(doubleValue, this.minWeight / 2.0d);
        }
        return doubleValue;
    }

    public List<Double> getLabelVector(int i) {
        return getLabelVector(this.labelList.get(i));
    }

    public List<Double> getLabelVector(String str) {
        ArrayList arrayList = new ArrayList();
        ArrayList<String> arrayList2 = this.tokenizer.tokenize(str);
        if (arrayList2 == null) {
            return null;
        }
        Iterator<String> it2 = arrayList2.iterator();
        while (it2.hasNext()) {
            arrayList.add(Double.valueOf(getTermWeight(it2.next())));
        }
        return arrayList;
    }

    public List<Double> getLabelVector(String str, boolean z) {
        ArrayList arrayList = new ArrayList();
        ArrayList<String> arrayList2 = this.tokenizer.tokenize(str);
        if (arrayList2 == null) {
            return null;
        }
        Iterator<String> it2 = arrayList2.iterator();
        while (it2.hasNext()) {
            arrayList.add(Double.valueOf(getTermWeight(it2.next(), z)));
        }
        return arrayList;
    }

    public void printOut() {
        for (Map.Entry<String, Double> entry : this.termWeightMap.entrySet()) {
            System.out.println(entry.getKey() + "\t:\t" + entry.getValue().doubleValue());
        }
    }

    double sigmoid(double d, double d2, double d3) {
        return 1.0d / (1.0d + Math.exp((-d2) * (d - d3)));
    }

    public static void main(String[] strArr) {
        String[] strArr2 = {"Yahoo Research", "Microsoft Research", "IBM Research", "Google Labs", "Bell Labs", "NEC Research Labs"};
        TokensIndexer tokensIndexer = new TokensIndexer(Arrays.asList(strArr2));
        System.out.println("weight of term [Reseach] = " + tokensIndexer.getTermWeight("Research"));
        System.out.println("weight of term [Microsoft] = " + tokensIndexer.getTermWeight("Microsoft"));
        System.out.println("weight of term [Labs] = " + tokensIndexer.getTermWeight("Labs"));
        System.out.println("Weight Vector of : " + strArr2[2] + "is : ");
        Iterator<Double> it2 = tokensIndexer.getLabelVector(2).iterator();
        while (it2.hasNext()) {
            System.out.print(it2.next().doubleValue() + " ");
        }
        System.out.println();
        System.out.println("-------------------------------------------");
        Iterator<String> it3 = new TokensIndexer(Arrays.asList(strArr2)).tokenize("has_an_email", true).iterator();
        while (it3.hasNext()) {
            System.out.println(it3.next());
        }
    }
}
