/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.lang.english;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
import opennlp.maxent.io.SuffixSensitiveGISModelReader;
import opennlp.tools.lang.english.HeadRules;
import opennlp.tools.lang.english.ParserChunker;
import opennlp.tools.lang.english.ParserTagger;
import opennlp.tools.parser.Parse;
import opennlp.tools.parser.Parser;
import opennlp.tools.util.Span;

public class TreebankParser {
    private static Pattern untokenizedParenPattern1 = Pattern.compile("([^ ])([({)}])");
    private static Pattern untokenizedParenPattern2 = Pattern.compile("([({)}])([^ ])");

    public static Parser getParser(String dataDir, boolean useTagDictionary, boolean useCaseSensitiveTagDictionary, int beamSize, double advancePercentage) throws IOException {
        if (useTagDictionary) {
            return new opennlp.tools.parser.chunking.Parser(new SuffixSensitiveGISModelReader(new File(dataDir + "/build.bin.gz")).getModel(), new SuffixSensitiveGISModelReader(new File(dataDir + "/check.bin.gz")).getModel(), new ParserTagger(dataDir + "/tag.bin.gz", dataDir + "/tagdict", useCaseSensitiveTagDictionary), new ParserChunker(dataDir + "/chunk.bin.gz"), new HeadRules(dataDir + "/head_rules"), beamSize, advancePercentage);
        }
        return new opennlp.tools.parser.chunking.Parser(new SuffixSensitiveGISModelReader(new File(dataDir + "/build.bin.gz")).getModel(), new SuffixSensitiveGISModelReader(new File(dataDir + "/check.bin.gz")).getModel(), new ParserTagger(dataDir + "/tag.bin.gz", null), new ParserChunker(dataDir + "/chunk.bin.gz"), new HeadRules(dataDir + "/head_rules"), beamSize, advancePercentage);
    }

    public static Parser getParser(String dataDir) throws IOException {
        return TreebankParser.getParser(dataDir, true, true, 20, 0.95);
    }

    private static String convertToken(String token) {
        if (token.equals("(")) {
            return "-LRB-";
        }
        if (token.equals(")")) {
            return "-RRB-";
        }
        if (token.equals("{")) {
            return "-LCB-";
        }
        if (token.equals("}")) {
            return "-RCB-";
        }
        return token;
    }

    public static Parse[] parseLine(String line, Parser parser, int numParses) {
        line = untokenizedParenPattern1.matcher(line).replaceAll("$1 $2");
        line = untokenizedParenPattern2.matcher(line).replaceAll("$1 $2");
        StringTokenizer str = new StringTokenizer(line);
        StringBuffer sb = new StringBuffer();
        ArrayList<String> tokens = new ArrayList<String>();
        while (str.hasMoreTokens()) {
            String tok = TreebankParser.convertToken(str.nextToken());
            tokens.add(tok);
            sb.append(tok).append(" ");
        }
        String text = sb.substring(0, sb.length() - 1);
        Parse p = new Parse(text, new Span(0, text.length()), "INC", 1.0, 0);
        int start = 0;
        int i = 0;
        Iterator ti = tokens.iterator();
        while (ti.hasNext()) {
            String tok = (String)ti.next();
            p.insert(new Parse(text, new Span(start, start + tok.length()), "TK", 0.0, i));
            start += tok.length() + 1;
            ++i;
        }
        Parse[] parses = numParses == 1 ? new Parse[]{parser.parse(p)} : parser.parse(p, numParses);
        return parses;
    }

    private static void usage() {
        System.err.println("Usage: TreebankParser [-d -i -bs n -ap f -type t] dataDirectory < tokenized_sentences");
        System.err.println("dataDirectory: Directory containing parser models.");
        System.err.println("-type [chunking|insertion]: Type of parser to use.");
        System.err.println("-d: Use tag dictionary.");
        System.err.println("-i: Case insensitive tag dictionary.");
        System.err.println("-bs 20: Use a beam size of 20.");
        System.err.println("-ap 0.95: Advance outcomes in with at least 95% of the probability mass.");
        System.err.println("-k 5: Show the top 5 parses.  This will also display their log-probablities.");
        System.exit(1);
    }

    public static void main(String[] args) throws IOException {
        if (args.length == 0) {
            TreebankParser.usage();
        }
        boolean useTagDictionary = false;
        boolean caseSensitiveTagDictionary = true;
        boolean showTopK = false;
        String parserType = "chunking";
        int numParses = 1;
        int ai = 0;
        int beamSize = 20;
        double advancePercentage = 0.95;
        while (args[ai].startsWith("-")) {
            if (args[ai].equals("-d")) {
                useTagDictionary = true;
            } else if (args[ai].equals("-i")) {
                caseSensitiveTagDictionary = false;
            } else if (args[ai].equals("-di") || args[ai].equals("-id")) {
                useTagDictionary = true;
                caseSensitiveTagDictionary = false;
            } else if (args[ai].equals("-bs")) {
                if (args.length > ai + 1) {
                    try {
                        beamSize = Integer.parseInt(args[ai + 1]);
                        ++ai;
                    }
                    catch (NumberFormatException nfe) {
                        System.err.println(nfe);
                        TreebankParser.usage();
                    }
                } else {
                    TreebankParser.usage();
                }
            } else if (args[ai].equals("-ap")) {
                if (args.length > ai + 1) {
                    try {
                        advancePercentage = Double.parseDouble(args[ai + 1]);
                        ++ai;
                    }
                    catch (NumberFormatException nfe) {
                        System.err.println(nfe);
                        TreebankParser.usage();
                    }
                } else {
                    TreebankParser.usage();
                }
            } else if (args[ai].equals("-k")) {
                showTopK = true;
                if (args.length > ai + 1) {
                    try {
                        numParses = Integer.parseInt(args[ai + 1]);
                        ++ai;
                    }
                    catch (NumberFormatException nfe) {
                        System.err.println(nfe);
                        TreebankParser.usage();
                    }
                } else {
                    TreebankParser.usage();
                }
            } else if (args[ai].equals("-type")) {
                if (args.length > ai + 1) {
                    parserType = args[ai + 1];
                    ++ai;
                    if (!parserType.equals("chunking") && !parserType.equals("insertion")) {
                        TreebankParser.usage();
                    }
                } else {
                    TreebankParser.usage();
                }
            } else {
                if (args[ai].equals("--")) {
                    ++ai;
                    break;
                }
                System.err.println("Unknown option " + args[ai]);
                TreebankParser.usage();
            }
            ++ai;
        }
        Parser parser = null;
        if (parserType.equals("chunking")) {
            parser = !caseSensitiveTagDictionary ? TreebankParser.getParser(args[ai++], true, false, beamSize, advancePercentage) : (useTagDictionary ? TreebankParser.getParser(args[ai++], true, true, beamSize, advancePercentage) : TreebankParser.getParser(args[ai++], false, false, beamSize, advancePercentage));
        }
        BufferedReader in = ai == args.length ? new BufferedReader(new InputStreamReader(System.in)) : new BufferedReader(new FileReader(args[ai]));
        try {
            String line;
            while (null != (line = in.readLine())) {
                if (line.length() == 0) {
                    System.out.println();
                    continue;
                }
                Parse[] parses = TreebankParser.parseLine(line, parser, numParses);
                int pn = parses.length;
                for (int pi = 0; pi < pn; ++pi) {
                    if (showTopK) {
                        System.out.print(pi + " " + parses[pi].getProb() + " ");
                    }
                    parses[pi].show();
                }
            }
        }
        catch (IOException e) {
            System.err.println(e);
        }
    }
}

