/*
 * Decompiled with CFR 0.152.
 */
package edu.mayo.bmi.uima.pos_tagger;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

public class TagDictionaryCreator {
    private static void writeDictionary(Map<String, Set<String>> dict, PrintStream out) throws IOException {
        for (String key : dict.keySet()) {
            String line = key.toString();
            Object[] sorted = new String[dict.get(key).size()];
            sorted = dict.get(key).toArray(sorted);
            if (sorted != null) {
                Arrays.sort(sorted);
            }
            Object[] objectArray = sorted;
            int n = sorted.length;
            int n2 = 0;
            while (n2 < n) {
                Object s = objectArray[n2];
                line = String.valueOf(line) + " " + (String)s;
                ++n2;
            }
            out.println(line);
        }
    }

    public static HashMap<String, Set<String>> createTagDictionary(BufferedReader br, boolean caseSensitive) throws IOException {
        String line;
        HashMap<String, Set<String>> dict = new HashMap<String, Set<String>>(50000);
        while ((line = br.readLine()) != null) {
            String[] stringArray = line.split(" ");
            int n = stringArray.length;
            int n2 = 0;
            while (n2 < n) {
                String token = stringArray[n2];
                int position = token.lastIndexOf(95);
                if (position == -1 || position == token.length() - 1) {
                    System.err.println("WARNING: '" + token + "' does not conform to the format WORD_TAG");
                } else {
                    String word = token.substring(0, position);
                    if (!caseSensitive) {
                        word = word.toLowerCase();
                    }
                    String tag = token.substring(position + 1);
                    Set<String> tagSet = dict.get(word);
                    if (tagSet == null) {
                        tagSet = new HashSet<String>();
                        tagSet.add(tag);
                        dict.put(word, tagSet);
                    } else if (!tagSet.contains(tag)) {
                        tagSet.add(tag);
                    }
                }
                ++n2;
            }
        }
        return dict;
    }

    private static BufferedReader getBufferedReader(String filename) throws FileNotFoundException {
        FileReader r;
        File f = new File(filename);
        try {
            r = new FileReader(f);
        }
        catch (FileNotFoundException e) {
            System.err.println("Error reading from file " + filename);
            throw e;
        }
        return new BufferedReader(r);
    }

    public static void main(String[] args) {
        boolean argumentsCorrect = true;
        if (args == null || args.length != 3) {
            argumentsCorrect = false;
        }
        String[] stringArray = args;
        int n = args.length;
        int n2 = 0;
        while (n2 < n) {
            String arg = stringArray[n2];
            if (arg == null || arg.trim().length() == 0) {
                argumentsCorrect = false;
            }
            ++n2;
        }
        if (!argumentsCorrect) {
            System.err.println("ERROR: three non-empty arguments are required.");
            TagDictionaryCreator.printUsage();
            return;
        }
        String arg0 = args[0].trim();
        if (arg0.equals("-h") || arg0.equals("--help")) {
            TagDictionaryCreator.printUsage();
            return;
        }
        String trainingDataFn = args[0];
        String tagDictFn = args[1];
        String caseSensitiveArg = args[2];
        try {
            PrintStream out = new PrintStream(tagDictFn);
            boolean caseSensitive = Boolean.parseBoolean(caseSensitiveArg);
            BufferedReader br = TagDictionaryCreator.getBufferedReader(trainingDataFn);
            HashMap<String, Set<String>> tagDictionary = TagDictionaryCreator.createTagDictionary(br, caseSensitive);
            TagDictionaryCreator.writeDictionary(tagDictionary, out);
            out.flush();
            out.close();
            System.out.println("TagDictionary written to " + tagDictFn);
        }
        catch (IOException e) {
            System.err.println("TagDictionaryCreator Failed");
            System.err.println("training-data = " + trainingDataFn);
            System.err.println("tag-dictionary = " + tagDictFn);
            System.err.println("case-sensitive = " + caseSensitiveArg);
            System.err.flush();
            TagDictionaryCreator.printUsage();
            File f = new File(args[0]);
            System.err.println("training-data absolute path = " + f.getAbsolutePath());
        }
    }

    public static void printUsage() {
        System.out.println("Usage: java TagDictionaryCreator <training-data> <tag-dictionary> <case-sensitive>");
        System.out.println("  where <training-data> is a file prepared for training the part-of-speech tagger as described in data/pos/training/README");
        System.out.println("  where <tag-dictionary> is the output file where the tag dictionary will be written");
        System.out.println("  where <case-sensitive> is either 'true' or 'false' depending on whether the tag dictionary should be case sensitive or not.");
    }
}

