/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.postag;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;
import opennlp.maxent.DataStream;
import opennlp.maxent.Evalable;
import opennlp.maxent.EventCollector;
import opennlp.maxent.EventStream;
import opennlp.maxent.GIS;
import opennlp.maxent.GISModel;
import opennlp.maxent.MaxentModel;
import opennlp.maxent.PlainTextByLineDataStream;
import opennlp.maxent.TwoPassDataIndexer;
import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.ngram.NGramModel;
import opennlp.tools.ngram.Token;
import opennlp.tools.ngram.TokenList;
import opennlp.tools.postag.DefaultPOSContextGenerator;
import opennlp.tools.postag.POSContextGenerator;
import opennlp.tools.postag.POSEventCollector;
import opennlp.tools.postag.POSEventStream;
import opennlp.tools.postag.POSTagger;
import opennlp.tools.postag.TagDictionary;
import opennlp.tools.util.BeamSearch;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.Pair;
import opennlp.tools.util.Sequence;

public class POSTaggerME
implements Evalable,
POSTagger {
    protected MaxentModel posModel;
    protected POSContextGenerator contextGen;
    protected TagDictionary tagDictionary;
    protected Dictionary ngramDictionary;
    protected boolean useClosedClassTagsFilter = false;
    private static final int DEFAULT_BEAM_SIZE = 3;
    protected int size;
    private Sequence bestSequence;
    protected BeamSearch beam;

    public POSTaggerME(MaxentModel model, TagDictionary tagdict) {
        this(model, new DefaultPOSContextGenerator(null), tagdict);
    }

    public POSTaggerME(MaxentModel model, Dictionary dict) {
        this(model, new DefaultPOSContextGenerator(dict));
    }

    public POSTaggerME(MaxentModel model, Dictionary dict, TagDictionary tagdict) {
        this(3, model, new DefaultPOSContextGenerator(dict), tagdict);
    }

    public POSTaggerME(MaxentModel model, POSContextGenerator cg) {
        this(3, model, cg, null);
    }

    public POSTaggerME(MaxentModel model, POSContextGenerator cg, TagDictionary tagdict) {
        this(3, model, cg, tagdict);
    }

    public POSTaggerME(int beamSize, MaxentModel model, POSContextGenerator cg, TagDictionary tagdict) {
        this.size = beamSize;
        this.posModel = model;
        this.contextGen = cg;
        this.beam = new PosBeamSearch(this.size, cg, model);
        this.tagDictionary = tagdict;
    }

    public String getNegativeOutcome() {
        return "";
    }

    public int getNumTags() {
        return this.posModel.getNumOutcomes();
    }

    public EventCollector getEventCollector(Reader r) {
        return new POSEventCollector(r, this.contextGen);
    }

    public List tag(List sentence) {
        this.bestSequence = this.beam.bestSequence(sentence, null);
        return this.bestSequence.getOutcomes();
    }

    public String[] tag(String[] sentence) {
        this.bestSequence = this.beam.bestSequence(sentence, null);
        List t = this.bestSequence.getOutcomes();
        return t.toArray(new String[t.size()]);
    }

    public String[][] tag(int numTaggings, String[] sentence) {
        Sequence[] bestSequences = this.beam.bestSequences(numTaggings, sentence, null);
        String[][] tags = new String[bestSequences.length][];
        for (int si = 0; si < tags.length; ++si) {
            List t = bestSequences[si].getOutcomes();
            tags[si] = t.toArray(new String[t.size()]);
        }
        return tags;
    }

    public void probs(double[] probs) {
        this.bestSequence.getProbs(probs);
    }

    public double[] probs() {
        return this.bestSequence.getProbs();
    }

    public String tag(String sentence) {
        ArrayList<String> toks = new ArrayList<String>();
        StringTokenizer st = new StringTokenizer(sentence);
        while (st.hasMoreTokens()) {
            toks.add(st.nextToken());
        }
        List tags = this.tag(toks);
        StringBuffer sb = new StringBuffer();
        for (int i = 0; i < tags.size(); ++i) {
            sb.append(toks.get(i) + "/" + tags.get(i) + " ");
        }
        return sb.toString().trim();
    }

    public void localEval(MaxentModel posModel, Reader r, Evalable e, boolean verbose) {
        this.posModel = posModel;
        float total = 0.0f;
        float correct = 0.0f;
        float sentences = 0.0f;
        float sentsCorrect = 0.0f;
        BufferedReader br = new BufferedReader(r);
        try {
            String line;
            while ((line = br.readLine()) != null) {
                sentences += 1.0f;
                Pair p = POSEventCollector.convertAnnotatedString(line);
                List words = (List)p.a;
                List outcomes = (List)p.b;
                List tags = this.beam.bestSequence(words, null).getOutcomes();
                int c = 0;
                boolean sentOk = true;
                Iterator t = tags.iterator();
                while (t.hasNext()) {
                    total += 1.0f;
                    String tag = (String)t.next();
                    if (tag.equals(outcomes.get(c))) {
                        correct += 1.0f;
                    } else {
                        sentOk = false;
                    }
                    ++c;
                }
                if (!sentOk) continue;
                sentsCorrect += 1.0f;
            }
        }
        catch (IOException E) {
            E.printStackTrace();
        }
        System.out.println("Accuracy         : " + correct / total);
        System.out.println("Sentence Accuracy: " + sentsCorrect / sentences);
    }

    public String[] getOrderedTags(List words, List tags, int index) {
        return this.getOrderedTags(words, tags, index, null);
    }

    public String[] getOrderedTags(List words, List tags, int index, double[] tprobs) {
        double[] probs = this.posModel.eval(this.contextGen.getContext(index, words.toArray(), tags.toArray(new String[tags.size()]), null));
        String[] orderedTags = new String[probs.length];
        for (int i = 0; i < probs.length; ++i) {
            int max = 0;
            for (int ti = 1; ti < probs.length; ++ti) {
                if (!(probs[ti] > probs[max])) continue;
                max = ti;
            }
            orderedTags[i] = this.posModel.getOutcome(max);
            if (tprobs != null) {
                tprobs[i] = probs[max];
            }
            probs[max] = 0.0;
        }
        return orderedTags;
    }

    public static void train(EventStream evc, File modelFile) throws IOException {
        GISModel model = POSTaggerME.train(evc, 100, 5);
        new SuffixSensitiveGISModelWriter(model, modelFile).persist();
    }

    public static GISModel train(EventStream es, int iterations, int cut) throws IOException {
        return GIS.trainModel(iterations, new TwoPassDataIndexer(es, cut));
    }

    private static void usage() {
        System.err.println("Usage: POSTaggerME [-encoding encoding] [-dict dict_file] training model [cutoff] [iterations]");
        System.err.println("This trains a new model on the specified training file and writes the trained model to the model file.");
        System.err.println("-encoding Specifies the encoding of the training file");
        System.err.println("-dict Specifies that a dictionary file should be created for use in distinguising between rare and non-rare words");
        System.exit(1);
    }

    public static void main(String[] args) throws IOException, InvalidFormatException {
        if (args.length == 0) {
            POSTaggerME.usage();
        }
        int ai = 0;
        try {
            String encoding = null;
            String dict = null;
            while (args[ai].startsWith("-")) {
                if (args[ai].equals("-encoding")) {
                    if (++ai < args.length) {
                        encoding = args[ai++];
                        continue;
                    }
                    POSTaggerME.usage();
                    continue;
                }
                if (args[ai].equals("-dict")) {
                    if (++ai < args.length) {
                        dict = args[ai++];
                        continue;
                    }
                    POSTaggerME.usage();
                    continue;
                }
                System.err.println("Unknown option " + args[ai]);
                POSTaggerME.usage();
            }
            File inFile = new File(args[ai++]);
            File outFile = new File(args[ai++]);
            int cutoff = 5;
            int iterations = 100;
            if (args.length > ai) {
                cutoff = Integer.parseInt(args[ai++]);
                iterations = Integer.parseInt(args[ai++]);
            }
            if (dict != null) {
                System.err.println("Building dictionary");
                NGramModel ngramModel = new NGramModel();
                PlainTextByLineDataStream data = new PlainTextByLineDataStream(new FileReader(inFile));
                while (data.hasNext()) {
                    String tagStr = (String)data.nextToken();
                    String[] tt = tagStr.split(" ");
                    Token[] words = new Token[tt.length];
                    for (int wi = 0; wi < words.length; ++wi) {
                        words[wi] = Token.create(tt[wi].substring(0, tt[wi].lastIndexOf(95)));
                    }
                    ngramModel.add(new TokenList(words), 1, 1);
                }
                System.out.println("Saving the dictionary");
                ngramModel.cutoff(cutoff, Integer.MAX_VALUE);
                Dictionary dictionary = ngramModel.toDictionary(true);
                dictionary.serialize(new FileOutputStream(dict));
            }
            POSEventStream es = encoding == null ? (dict == null ? new POSEventStream(new PlainTextByLineDataStream(new InputStreamReader(new FileInputStream(inFile)))) : new POSEventStream((DataStream)new PlainTextByLineDataStream(new InputStreamReader(new FileInputStream(inFile))), new Dictionary(new FileInputStream(dict)))) : (dict == null ? new POSEventStream(new PlainTextByLineDataStream(new InputStreamReader((InputStream)new FileInputStream(inFile), encoding))) : new POSEventStream((DataStream)new PlainTextByLineDataStream(new InputStreamReader((InputStream)new FileInputStream(inFile), encoding)), new Dictionary(new FileInputStream(dict))));
            GISModel mod = POSTaggerME.train(es, iterations, cutoff);
            System.out.println("Saving the model as: " + outFile);
            new SuffixSensitiveGISModelWriter(mod, outFile).persist();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private class PosBeamSearch
    extends BeamSearch {
        PosBeamSearch(int size, POSContextGenerator cg, MaxentModel model) {
            super(size, cg, model);
        }

        PosBeamSearch(int size, POSContextGenerator cg, MaxentModel model, int cacheSize) {
            super(size, cg, model, cacheSize);
        }

        protected boolean validSequence(int i, Object[] inputSequence, String[] outcomesSequence, String outcome) {
            if (POSTaggerME.this.tagDictionary == null) {
                return true;
            }
            String[] tags = POSTaggerME.this.tagDictionary.getTags(inputSequence[i].toString());
            if (tags == null) {
                return true;
            }
            return Arrays.asList(tags).contains(outcome);
        }
    }
}

