/*
 * Decompiled with CFR 0.152.
 */
package weka.filters.unsupervised.attribute;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import weka.core.Capabilities;
import weka.core.DictionaryBuilder;
import weka.core.Environment;
import weka.core.EnvironmentHandler;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.OptionMetadata;
import weka.core.stemmers.NullStemmer;
import weka.core.stemmers.Stemmer;
import weka.core.stopwords.Null;
import weka.core.stopwords.StopwordsHandler;
import weka.core.tokenizers.Tokenizer;
import weka.filters.SimpleStreamFilter;
import weka.filters.UnsupervisedFilter;
import weka.gui.FilePropertyMetadata;

public class FixedDictionaryStringToWordVector
extends SimpleStreamFilter
implements UnsupervisedFilter,
EnvironmentHandler {
    private static final long serialVersionUID = 7990892846966916757L;
    protected DictionaryBuilder m_vectorizer = new DictionaryBuilder();
    protected File m_dictionaryFile = new File("-- set me --");
    protected transient InputStream m_dictionarySource;
    protected transient Reader m_textDictionarySource;
    protected boolean m_dictionaryIsBinary;
    protected transient Environment m_env = Environment.getSystemWide();

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enableAllAttributes();
        result.enable(Capabilities.Capability.MISSING_VALUES);
        result.enableAllClasses();
        result.enable(Capabilities.Capability.MISSING_CLASS_VALUES);
        result.enable(Capabilities.Capability.NO_CLASS);
        return result;
    }

    public DictionaryBuilder getDictionaryHandler() {
        return this.m_vectorizer;
    }

    public void setDictionarySource(InputStream source) {
        this.m_dictionarySource = source;
    }

    public void setDictionarySource(Reader source) {
        this.m_textDictionarySource = source;
    }

    @OptionMetadata(displayName="Dictionary file", description="The path to the dictionary to use", commandLineParamName="dictionary", commandLineParamSynopsis="-dictionary <path to dictionary file>", displayOrder=1)
    @FilePropertyMetadata(fileChooserDialogType=0, directoriesOnly=false)
    public void setDictionaryFile(File file) {
        this.m_dictionaryFile = file;
    }

    public File getDictionaryFile() {
        return this.m_dictionaryFile;
    }

    @OptionMetadata(displayName="Dictionary is binary", description="Dictionary file contains a binary serialized dictionary", commandLineParamName="binary-dict", commandLineParamSynopsis="-binary-dict", commandLineParamIsFlag=true, displayOrder=2)
    public void setDictionaryIsBinary(boolean binary) {
        this.m_dictionaryIsBinary = binary;
    }

    public boolean getDictionaryIsBinary() {
        return this.m_dictionaryIsBinary;
    }

    public boolean getOutputWordCounts() {
        return this.m_vectorizer.getOutputWordCounts();
    }

    @OptionMetadata(displayName="Output word counts", description="Output word counts rather than boolean 0 or 1 (indicating presence or absence of a word", commandLineParamName="C", commandLineParamSynopsis="-C", commandLineParamIsFlag=true, displayOrder=3)
    public void setOutputWordCounts(boolean outputWordCounts) {
        this.m_vectorizer.setOutputWordCounts(outputWordCounts);
    }

    public String getAttributeIndices() {
        return this.m_vectorizer.getAttributeIndices();
    }

    @OptionMetadata(displayName="Range of attributes to operate on", description="Specify range of attributes to act on. This is a comma separated list of attribute\nindices, with \"first\" and \"last\" valid values.", commandLineParamName="R", commandLineParamSynopsis="-R <range>", displayOrder=4)
    public void setAttributeIndices(String rangeList) {
        this.m_vectorizer.setAttributeIndices(rangeList);
    }

    public boolean getInvertSelection() {
        return this.m_vectorizer.getInvertSelection();
    }

    @OptionMetadata(displayName="Invert selection", description="Set attributes selection mode. If false, only selected attributes in the range will\nbe worked on. If true, only non-selected attributes will be processed", commandLineParamName="V", commandLineParamSynopsis="-V", commandLineParamIsFlag=true, displayOrder=5)
    public void setInvertSelection(boolean invert) {
        this.m_vectorizer.setInvertSelection(invert);
    }

    public String getAttributeNamePrefix() {
        return this.m_vectorizer.getAttributeNamePrefix();
    }

    @OptionMetadata(displayName="Prefix for created attribute names", description="Specify a prefix for the created attribute names (default: \"\")", commandLineParamName="P", commandLineParamSynopsis="-P <attribute name prefix>", displayOrder=6)
    public void setAttributeNamePrefix(String newPrefix) {
        this.m_vectorizer.setAttributeNamePrefix(newPrefix);
    }

    public boolean getTFTransform() {
        return this.m_vectorizer.getTFTransform();
    }

    @OptionMetadata(displayName="TFT transform", description="Set whether the word frequencies should be transformed into\nlog(1+fij), where fij is the frequency of word i in document (instance) j.", commandLineParamName="T", commandLineParamSynopsis="-T", displayOrder=7)
    public void setTFTransform(boolean TFTransform) {
        this.m_vectorizer.setTFTransform(TFTransform);
    }

    public boolean getIDFTransform() {
        return this.m_vectorizer.getIDFTransform();
    }

    @OptionMetadata(displayName="IDF transform", description="Set whether the word frequencies in a document should be transformed into\nfij*log(num of Docs/num of docs with word i), where fij is the frequency\nof word i in document (instance) j.", commandLineParamName="I", commandLineParamSynopsis="-I", displayOrder=8)
    public void setIDFTransform(boolean IDFTransform) {
        this.m_vectorizer.setIDFTransform(IDFTransform);
    }

    @OptionMetadata(displayName="Normalize word frequencies", description="Whether to normalize to average length of documents seen during dictionary construction", commandLineParamName="N", commandLineParamSynopsis="-N", commandLineParamIsFlag=true, displayOrder=9)
    public void setNormalizeDocLength(boolean normalize) {
        this.m_vectorizer.setNormalize(normalize);
    }

    public boolean getNormalizeDocLength() {
        return this.m_vectorizer.getNormalize();
    }

    public boolean getLowerCaseTokens() {
        return this.m_vectorizer.getLowerCaseTokens();
    }

    @OptionMetadata(displayName="Lower case tokens", description="Convert all tokens to lowercase when matching against dictionary entries.", commandLineParamName="L", commandLineParamSynopsis="-L", commandLineParamIsFlag=true, displayOrder=10)
    public void setLowerCaseTokens(boolean downCaseTokens) {
        this.m_vectorizer.setLowerCaseTokens(downCaseTokens);
    }

    @OptionMetadata(displayName="Stemmer to use", description="The stemming algorithm (classname plus parameters) to use.", commandLineParamName="stemmer", commandLineParamSynopsis="-stemmer <spec>", displayOrder=11)
    public void setStemmer(Stemmer value) {
        if (value != null) {
            this.m_vectorizer.setStemmer(value);
        } else {
            this.m_vectorizer.setStemmer(new NullStemmer());
        }
    }

    public Stemmer getStemmer() {
        return this.m_vectorizer.getStemmer();
    }

    @OptionMetadata(displayName="Stop words handler", description="The stopwords handler to use (default = Null)", commandLineParamName="stopwords-handler", commandLineParamSynopsis="-stopwords-handler <spec>", displayOrder=12)
    public void setStopwordsHandler(StopwordsHandler value) {
        if (value != null) {
            this.m_vectorizer.setStopwordsHandler(value);
        } else {
            this.m_vectorizer.setStopwordsHandler(new Null());
        }
    }

    public StopwordsHandler getStopwordsHandler() {
        return this.m_vectorizer.getStopwordsHandler();
    }

    @OptionMetadata(displayName="Tokenizer", description="The tokenizing algorithm (classname plus parameters) to use.\n(default: weka.core.tokenizers.WordTokenizer)", commandLineParamName="tokenizer", commandLineParamSynopsis="-tokenizer <spec>", displayOrder=13)
    public void setTokenizer(Tokenizer value) {
        this.m_vectorizer.setTokenizer(value);
    }

    public Tokenizer getTokenizer() {
        return this.m_vectorizer.getTokenizer();
    }

    @Override
    public String globalInfo() {
        return "Converts String attributes into a set of attributes representing word occurrence (depending on the tokenizer) information from the text contained in the strings. The set of words (attributes) is taken from a user-supplied dictionary, either in plain text form or as a serialized java object.";
    }

    @Override
    protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
        if (this.m_vectorizer.readyToVectorize() && inputFormat.equalHeaders(this.m_vectorizer.getInputFormat())) {
            return this.m_vectorizer.getVectorizedFormat();
        }
        this.m_vectorizer.reset();
        this.m_vectorizer.setup(inputFormat);
        if (this.m_dictionaryFile == null && this.m_dictionarySource == null && this.m_textDictionarySource == null) {
            throw new IOException("No dictionary file/source specified!");
        }
        if (this.m_dictionarySource != null) {
            this.m_vectorizer.loadDictionary(this.m_dictionarySource);
        } else if (this.m_textDictionarySource != null) {
            this.m_vectorizer.loadDictionary(this.m_textDictionarySource);
        } else {
            String fString = this.m_dictionaryFile.toString();
            if (fString.length() == 0) {
                throw new IOException("No dictionary file specified!");
            }
            try {
                fString = this.m_env.substitute(fString);
            }
            catch (Exception ex) {
                // empty catch block
            }
            File dictFile = new File(fString);
            if (!dictFile.exists()) {
                throw new IOException("Specified dictionary file '" + fString + "' does not seem to exist!");
            }
            this.m_vectorizer.loadDictionary(dictFile, !this.m_dictionaryIsBinary);
        }
        return this.m_vectorizer.getVectorizedFormat();
    }

    @Override
    protected Instance process(Instance instance) throws Exception {
        return this.m_vectorizer.vectorizeInstance(instance);
    }

    @Override
    public void setEnvironment(Environment env) {
        this.m_env = env;
    }

    public static void main(String[] args) {
        FixedDictionaryStringToWordVector.runFilter(new FixedDictionaryStringToWordVector(), args);
    }
}

