/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.Tag;
import edu.stanford.nlp.parser.lexparser.IntTaggedWord;
import edu.stanford.nlp.parser.lexparser.Lexicon;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.lexparser.Train;
import edu.stanford.nlp.parser.lexparser.UnknownWordModel;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.Pair;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class BaseUnknownWordModel
implements UnknownWordModel {
    private static final long serialVersionUID = 6355171148751673822L;
    protected static final boolean VERBOSE = false;
    protected boolean useFirst = false;
    private boolean useEnd = true;
    protected boolean useGT = false;
    private boolean useFirstCap = true;
    private int endLength = 2;
    protected int unknownLevel;
    protected static final String unknown = "UNK";
    protected static final int nullWord = -1;
    protected static final short nullTag = -1;
    protected ClassicCounter<IntTaggedWord> unSeenCounter = new ClassicCounter();
    protected HashMap<Label, ClassicCounter<String>> tagHash = new HashMap();
    private Set<String> seenEnd = new HashSet<String>();
    private HashMap<Label, Float> unknownGT = new HashMap();
    private final Lexicon lex;

    public BaseUnknownWordModel(Options.LexOptions op, Lexicon lex) {
        this.endLength = op.unknownSuffixSize;
        this.useEnd = op.unknownSuffixSize > 0 && op.useUnknownWordSignatures > 0;
        this.useFirstCap = op.useUnknownWordSignatures > 0;
        this.lex = lex;
    }

    @Override
    public float score(IntTaggedWord itw, int loc, double c_Tseen, double total, double smooth) {
        return this.score(itw);
    }

    public float score(IntTaggedWord itw) {
        float logProb;
        String word = itw.wordString();
        String tagStr = itw.tagString();
        Tag tag = new Tag(tagStr);
        if (this.useEnd || this.useFirst || this.useFirstCap) {
            String end = this.getSignature(word, -1);
            if (this.useGT && !this.seenEnd.contains(end)) {
                logProb = this.scoreGT(tag);
            } else {
                ClassicCounter<String> wordProbs;
                if (!this.seenEnd.contains(end)) {
                    end = unknown;
                }
                if ((wordProbs = this.tagHash.get(tag)) == null) {
                    System.err.println("Warning: proposed tag is unseen in training data:\t" + tag);
                    logProb = Float.NEGATIVE_INFINITY;
                } else {
                    logProb = wordProbs.keySet().contains(end) ? (float)wordProbs.getCount(end) : (float)wordProbs.getCount(unknown);
                }
            }
        } else if (this.useGT) {
            logProb = this.scoreGT(tag);
        } else {
            System.err.println("Warning: no unknown word model in place!\nGiving the combination " + word + ' ' + tag + " zero probability.");
            logProb = Float.NEGATIVE_INFINITY;
        }
        return logProb;
    }

    @Override
    public double scoreProbTagGivenWordSignature(IntTaggedWord iTW, int loc, double smooth) {
        throw new UnsupportedOperationException();
    }

    protected float scoreGT(Label tag) {
        if (this.unknownGT.containsKey(tag)) {
            return this.unknownGT.get(tag).floatValue();
        }
        return Float.NEGATIVE_INFINITY;
    }

    @Override
    public String getSignature(String word, int loc) {
        StringBuilder subStr = new StringBuilder("UNK-");
        int n = word.length() - 1;
        char first = word.charAt(0);
        if (this.useFirstCap) {
            if (Character.isUpperCase(first) || Character.isTitleCase(first)) {
                subStr.append('C');
            } else {
                subStr.append('c');
            }
        }
        if (this.useFirst) {
            subStr.append(first);
        }
        if (this.useEnd) {
            subStr.append(word.substring(n - this.endLength > 0 ? n - this.endLength : 0, n));
        }
        return subStr.toString();
    }

    @Override
    public int getSignatureIndex(int wordIndex, int sentencePosition) {
        return 0;
    }

    @Override
    public void train(Collection<Tree> trees) {
        ClassicCounter<IntTaggedWord> seenCounter = new ClassicCounter<IntTaggedWord>();
        ClassicCounter<Label> tc = new ClassicCounter<Label>();
        if (this.useFirst) {
            System.err.println("Including first letter for unknown words.");
        }
        if (this.useFirstCap) {
            System.err.println("Including whether first letter is capitalized for unknown words");
        }
        if (this.useEnd) {
            System.err.println("Classing unknown word as the average of their equivalents by identity of last " + this.endLength + " letters.");
        }
        if (this.useGT) {
            System.err.println("Using Good-Turing smoothing for unknown words.");
        }
        this.trainUnknownGT(trees);
        HashMap c = new HashMap();
        int tNum = 0;
        int tSize = trees.size();
        int indexToStartUnkCounting = (int)((double)tSize * Train.fractionBeforeUnseenCounting);
        IntTaggedWord iTotal = new IntTaggedWord(-1, -1);
        for (Tree t : trees) {
            ++tNum;
            for (Tree node : t) {
                if (!node.isPreTerminal()) continue;
                String word = node.firstChild().label().value();
                String subString = this.getSignature(word, -1);
                Label tag = node.label();
                if (!c.containsKey(tag)) {
                    c.put(tag, new ClassicCounter());
                }
                ((ClassicCounter)c.get(tag)).incrementCount(subString);
                tc.incrementCount(tag);
                this.seenEnd.add(subString);
                String tagStr = node.label().value();
                IntTaggedWord iW = new IntTaggedWord(word, ".*.");
                seenCounter.incrementCount(iW);
                if (tNum <= indexToStartUnkCounting || !(seenCounter.getCount(iW) < 2.0)) continue;
                IntTaggedWord iT = new IntTaggedWord(".*.", tagStr);
                this.unSeenCounter.incrementCount(iT);
                this.unSeenCounter.incrementCount(iTotal);
            }
        }
        for (Label tag : c.keySet()) {
            ClassicCounter wc = (ClassicCounter)c.get(tag);
            if (!this.tagHash.containsKey(tag)) {
                this.tagHash.put(tag, new ClassicCounter());
            }
            tc.incrementCount(tag);
            wc.setCount(unknown, 1.0);
            for (String end : wc.keySet()) {
                double prob = Math.log(wc.getCount(end) / tc.getCount(tag));
                this.tagHash.get(tag).setCount(end, prob);
            }
        }
    }

    protected void trainUnknownGT(Collection<Tree> trees) {
        ClassicCounter<Pair> wtCount = new ClassicCounter<Pair>();
        ClassicCounter<Label> tagCount = new ClassicCounter<Label>();
        ClassicCounter<Object> r1 = new ClassicCounter<Object>();
        ClassicCounter<Label> r0 = new ClassicCounter<Label>();
        HashSet<String> seenWords = new HashSet<String>();
        int tokens = 0;
        for (Tree t : trees) {
            for (Tree node : t) {
                if (!node.isPreTerminal()) continue;
                ++tokens;
                String word = node.getChild(0).label().value();
                Label tag = node.label();
                Pair<String, Label> wt = new Pair<String, Label>(word, tag);
                wtCount.incrementCount(wt);
                tagCount.incrementCount(tag);
                seenWords.add(word);
            }
        }
        System.out.println("Total tokens: " + tokens);
        System.out.println("Total WordTag types: " + wtCount.keySet().size());
        System.out.println("Total tag types: " + tagCount.keySet().size());
        System.out.println("Total word types: " + seenWords.size());
        for (Pair wt : wtCount.keySet()) {
            if (wtCount.getCount(wt) != 1.0) continue;
            r1.incrementCount(wt.second());
        }
        for (Label tag : tagCount.keySet()) {
            for (String word : seenWords) {
                Pair<String, Label> wt = new Pair<String, Label>(word, tag);
                if (wtCount.keySet().contains(wt)) continue;
                r0.incrementCount(tag);
            }
        }
        for (Label tag : tagCount.keySet()) {
            float logprob = (float)Math.log(r1.getCount(tag) / (tagCount.getCount(tag) * r0.getCount(tag)));
            this.unknownGT.put(tag, Float.valueOf(logprob));
        }
    }

    @Override
    public Lexicon getLexicon() {
        return this.lex;
    }

    @Override
    public int getUnknownLevel() {
        return this.unknownLevel;
    }

    @Override
    public void setUnknownLevel(int unknownLevel) {
        this.unknownLevel = unknownLevel;
    }

    @Override
    public void addTagging(boolean seen, IntTaggedWord itw, double count) {
        if (seen) {
            System.err.println("UWM.addTagging: Shouldn't call with seen word!");
        } else {
            this.unSeenCounter.incrementCount(itw, count);
        }
    }

    @Override
    public Counter<IntTaggedWord> unSeenCounter() {
        return this.unSeenCounter;
    }
}

