/*
 * Decompiled with CFR 0.152.
 */
package cic.ad;

import cic.ad.FeatureVector;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;

public class Tokens
implements Serializable {
    private static final long serialVersionUID = 1L;
    HashMap<String, Integer> dictionary = new HashMap();
    int startIndex = 0;
    int dictSize = 0;
    int numOtherFeatures = 0;
    int numTokenFeatures = 0;
    int numNGramFeatures = 0;
    int numFeatures = 0;

    public Tokens(int startIndex) {
        this.startIndex = startIndex + 1;
    }

    public void makeVector(FeatureVector f, boolean addMore) {
        int numBefore;
        List<String> tokens = Tokens.getTokens(f.getUrl());
        f.addFeature(this.startIndex, tokens.size(), "num_tokens");
        int indexNNT = 1;
        this.numOtherFeatures = numBefore = 2;
        this.makeTokenFeatures(f, tokens, numBefore, indexNNT, addMore);
        int n = 3;
        this.makeSequentialNGram(f, tokens, n, numBefore, addMore);
    }

    public void makeTokenFeatures(FeatureVector f, List<String> tokens, int numBefore, int indexNNT, boolean addMore) {
        int NNT = 0;
        for (String token : tokens) {
            int tokenIndex;
            if (StringUtils.isNumeric(token)) {
                ++NNT;
            }
            if (this.dictionary.get(token) != null) {
                tokenIndex = this.dictionary.get(token);
                f.addFeature(tokenIndex, 1.0, "token_" + token);
                continue;
            }
            if (!addMore) continue;
            tokenIndex = this.startIndex + numBefore + this.dictSize++;
            this.dictionary.put(token, tokenIndex);
            f.addFeature(tokenIndex, 1.0, "token_" + token);
        }
        f.addFeature(this.startIndex + indexNNT, NNT, "num_numeric_tokens");
        this.numTokenFeatures = this.dictSize;
    }

    public void makeSequentialNGram(FeatureVector f, List<String> tokens, int n, int numBefore, boolean addMore) {
        List<String> ngrams = this.getSequentialNGram(tokens, n);
        for (String gram : ngrams) {
            int gramIndex;
            if (this.dictionary.get(gram) != null) {
                gramIndex = this.dictionary.get(gram);
                f.addFeature(gramIndex, 1.0, "gram_" + gram);
                continue;
            }
            if (!addMore) continue;
            gramIndex = this.startIndex + numBefore + this.dictSize++;
            this.dictionary.put(gram, gramIndex);
            f.addFeature(gramIndex, 1.0, "gram_" + gram.replaceAll("\\s", "_"));
        }
        this.numNGramFeatures = this.dictSize - this.numTokenFeatures;
    }

    public int getNumFeatures() {
        this.numFeatures = this.dictSize + this.numOtherFeatures;
        return this.numFeatures;
    }

    public static List<String> getTokens(String url) {
        ArrayList<String> tokens = new ArrayList<String>();
        if (url == null) {
            return tokens;
        }
        Pattern reg = Pattern.compile("\\w+");
        Matcher m = reg.matcher(url);
        while (m.find()) {
            tokens.add(m.group());
        }
        return tokens;
    }

    public static String getLongestToken(List<String> tokens) {
        String longest = "";
        int max = 0;
        for (String token : tokens) {
            if (token.length() <= max) continue;
            longest = token;
            max = token.length();
        }
        return longest;
    }

    public static double getAverageTokenLength(List<String> tokens) {
        double avg = 0.0;
        for (String token : tokens) {
            avg += (double)token.length();
        }
        if (tokens.size() > 0) {
            avg /= (double)tokens.size();
        }
        return avg;
    }

    public void getNGram(String url) {
    }

    public List<String> getSequentialNGram(List<String> tokens, int n) {
        if (n == -1) {
            n = 3;
        }
        ArrayList<String> ngram = new ArrayList<String>();
        for (int i = 0; i <= tokens.size() - n; ++i) {
            String str = "";
            for (int j = i; j < i + n - 1; ++j) {
                str = str + tokens.get(j) + " ";
            }
            str = str + tokens.get(i + n - 1);
            ngram.add(str);
        }
        return ngram;
    }

    public void printAll() {
        HashMap<Integer, String> rev = new HashMap<Integer, String>();
        for (Map.Entry<String, Integer> entry : this.dictionary.entrySet()) {
            rev.put(entry.getValue(), entry.getKey());
        }
        for (Map.Entry<String, Integer> entry : rev.entrySet()) {
            String key = ((Integer)((Object)entry.getKey())).toString();
            String value = (String)((Object)entry.getValue());
            System.out.println(key + "\t" + value);
        }
    }

    public int getDictSize() {
        return this.dictSize;
    }

    public HashMap<String, Integer> getDictionary() {
        return this.dictionary;
    }
}

