/*
 * Decompiled with CFR 0.152.
 */
package edu.mayo.bmi.nlp.parser;

import edu.mayo.bmi.nlp.parser.type.ConllDependencyNode;
import edu.mayo.bmi.uima.core.type.BaseToken;
import edu.mayo.bmi.uima.core.type.Sentence;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;

public class DependencyFileCollectionReader
extends CollectionReader_ImplBase {
    public static final String DEPENDENCY_FILE_PARAM = "DependencyFile";
    public static final String INPUT_FORMAT_PARAM = "InputFormat";
    public static final String TRAINING_MODE_PARAM = "TrainingMode";
    BufferedReader input;
    String line = null;
    private int docCtr = 0;
    String inputFormat;
    boolean trainingMode = false;

    @Override
    public void initialize() throws ResourceInitializationException {
        try {
            String DependencyFile = (String)this.getConfigParameterValue(DEPENDENCY_FILE_PARAM);
            this.input = new BufferedReader(new FileReader(DependencyFile));
            String paramValue = (String)this.getConfigParameterValue(INPUT_FORMAT_PARAM);
            this.inputFormat = paramValue.toLowerCase();
            this.trainingMode = (Boolean)this.getConfigParameterValue(TRAINING_MODE_PARAM);
        }
        catch (FileNotFoundException fnfe) {
            throw new ResourceInitializationException(fnfe);
        }
    }

    @Override
    public void getNext(CAS cas) throws IOException, CollectionException {
        try {
            JCas jCas = cas.getJCas();
            int wordNumber = 0;
            int sentNumber = 0;
            int wordStart = 0;
            int wordEnd = 0;
            int sentStart = 0;
            int sentEnd = 0;
            ArrayList<String> lines = new ArrayList<String>(50);
            StringBuffer documentText = new StringBuffer();
            this.line = this.input.readLine();
            if (this.line == null) {
                return;
            }
            lines.add(this.line);
            while (true) {
                this.line = this.input.readLine();
                String testline = "";
                if (lines.size() > 0) {
                    testline = (String)lines.get(lines.size() - 1);
                }
                if (this.line == null && testline.matches("\\A\\s*\\Z")) {
                    jCas.setDocumentText(documentText.toString());
                    break;
                }
                if (this.line.matches("\\A\\s*\\Z") || this.line == null && !((String)lines.get(lines.size())).matches("^\\s*$")) {
                    BaseToken btoken;
                    String[] tokens;
                    lines.trimToSize();
                    ArrayList<ConllDependencyNode> depNodes = new ArrayList<ConllDependencyNode>(lines.size());
                    sentEnd = sentStart = wordStart;
                    for (String aline : lines) {
                        sentEnd += aline.split("\t")[1].length() + 1;
                    }
                    if (!this.inputFormat.contains("tok")) {
                        depNodes.add(new ConllDependencyNode(jCas, sentStart, sentEnd));
                        ((ConllDependencyNode)depNodes.get(depNodes.size() - 1)).setID(0);
                        ((ConllDependencyNode)depNodes.get(depNodes.size() - 1)).addToIndexes(jCas);
                    }
                    if (this.inputFormat.contains("tok")) {
                        for (String aline : lines) {
                            tokens = aline.split("\t");
                            wordEnd = wordStart + tokens[1].length();
                            btoken = new BaseToken(jCas, wordStart, wordEnd);
                            btoken.setTokenNumber(wordNumber++);
                            btoken.addToIndexes();
                            documentText.append(String.valueOf(tokens[1]) + " ");
                            wordStart = wordEnd + 1;
                        }
                    } else if (this.inputFormat.contains("min")) {
                        for (String aline : lines) {
                            tokens = aline.split("\t");
                            wordEnd = wordStart + tokens[1].length();
                            if (this.trainingMode) {
                                depNodes.add(new ConllDependencyNode(jCas, wordStart, wordEnd));
                            }
                            btoken = new BaseToken(jCas, wordStart, wordEnd);
                            btoken.setTokenNumber(wordNumber++);
                            btoken.addToIndexes();
                            documentText.append(String.valueOf(tokens[1]) + " ");
                            wordStart = wordEnd + 1;
                        }
                    } else if (this.inputFormat.contains("mpos")) {
                        for (String aline : lines) {
                            tokens = aline.split("\t");
                            wordEnd = wordStart + tokens[1].length();
                            if (this.trainingMode) {
                                depNodes.add(new ConllDependencyNode(jCas, wordStart, wordEnd));
                            }
                            btoken = new BaseToken(jCas, wordStart, wordEnd);
                            btoken.setTokenNumber(wordNumber++);
                            btoken.setPartOfSpeech(tokens[2]);
                            btoken.addToIndexes();
                            documentText.append(String.valueOf(tokens[1]) + " ");
                            wordStart = wordEnd + 1;
                        }
                    } else if (this.inputFormat.contains("mlem")) {
                        for (String aline : lines) {
                            tokens = aline.split("\t");
                            wordEnd = wordStart + tokens[1].length();
                            if (this.trainingMode) {
                                depNodes.add(new ConllDependencyNode(jCas, wordStart, wordEnd));
                            }
                            btoken = new BaseToken(jCas, wordStart, wordEnd);
                            btoken.setTokenNumber(wordNumber++);
                            btoken.setNormalizedForm(tokens[2]);
                            btoken.addToIndexes();
                            documentText.append(String.valueOf(tokens[1]) + " ");
                            wordStart = wordEnd + 1;
                        }
                    } else if (this.inputFormat.contains("dep")) {
                        for (String aline : lines) {
                            tokens = aline.split("\t");
                            wordEnd = wordStart + tokens[1].length();
                            if (this.trainingMode) {
                                depNodes.add(new ConllDependencyNode(jCas, wordStart, wordEnd));
                            }
                            btoken = new BaseToken(jCas, wordStart, wordEnd);
                            btoken.setTokenNumber(wordNumber++);
                            btoken.setNormalizedForm(tokens[2]);
                            btoken.setPartOfSpeech(tokens[3]);
                            btoken.addToIndexes();
                            documentText.append(String.valueOf(tokens[1]) + " ");
                            wordStart = wordEnd + 1;
                        }
                    } else {
                        if (!this.inputFormat.contains("conll")) {
                            System.err.println("Warning: Assuming CONLL-x input format");
                        }
                        for (String aline : lines) {
                            tokens = aline.split("\t");
                            wordEnd = wordStart + tokens[1].length();
                            if (this.trainingMode) {
                                depNodes.add(new ConllDependencyNode(jCas, wordStart, wordEnd));
                            }
                            btoken = new BaseToken(jCas, wordStart, wordEnd);
                            btoken.setTokenNumber(wordNumber++);
                            btoken.setNormalizedForm(tokens[2]);
                            btoken.setPartOfSpeech(tokens[4]);
                            btoken.addToIndexes();
                            documentText.append(String.valueOf(tokens[1]) + " ");
                            wordStart = wordEnd + 1;
                        }
                    }
                    Sentence sentence = new Sentence(jCas, sentStart, wordEnd);
                    sentence.setSentenceNumber(sentNumber);
                    sentence.addToIndexes();
                    if (!this.inputFormat.contains("tok") && this.trainingMode) {
                        this.setDependencyNodesFromTabbedText(jCas, lines, documentText, depNodes);
                    }
                    if (this.line == null) {
                        jCas.setDocumentText(documentText.toString());
                        break;
                    }
                    ++sentNumber;
                    lines = new ArrayList(50);
                    continue;
                }
                lines.add(this.line);
            }
        }
        catch (CASException ce) {
            throw new CollectionException(ce);
        }
        this.line = null;
    }

    private void setDependencyNodesFromTabbedText(JCas jCas, ArrayList<String> lines, StringBuffer documentText, ArrayList<ConllDependencyNode> depNodes) {
        int i = 1;
        if (this.inputFormat.contains("min")) {
            for (String aline : lines) {
                String[] tokens = aline.split("\t");
                depNodes.get(i).setID(Integer.parseInt(tokens[0]));
                depNodes.get(i).setFORM(tokens[1]);
                depNodes.get(i).setHEAD(depNodes.get(Integer.parseInt(tokens[2])));
                depNodes.get(i).setDEPREL(tokens[3]);
                depNodes.get(i).setLEMMA("_");
                depNodes.get(i).setCPOSTAG("_");
                depNodes.get(i).setPOSTAG("_");
                depNodes.get(i).setFEATS("_");
                depNodes.get(i).setPHEAD(null);
                depNodes.get(i).setPDEPREL("_");
                depNodes.get(i).addToIndexes(jCas);
                ++i;
            }
        } else if (this.inputFormat.contains("mpos")) {
            for (String aline : lines) {
                String[] tokens = aline.split("\t");
                depNodes.get(i).setID(Integer.parseInt(tokens[0]));
                depNodes.get(i).setFORM(tokens[1]);
                depNodes.get(i).setPOSTAG(tokens[2]);
                depNodes.get(i).setCPOSTAG(tokens[2]);
                depNodes.get(i).setHEAD(depNodes.get(Integer.parseInt(tokens[3])));
                depNodes.get(i).setDEPREL(tokens[4]);
                depNodes.get(i).setLEMMA("_");
                depNodes.get(i).setFEATS("_");
                depNodes.get(i).setPHEAD(null);
                depNodes.get(i).setPDEPREL("_");
                depNodes.get(i).addToIndexes(jCas);
                ++i;
            }
        } else if (this.inputFormat.contains("mlem")) {
            for (String aline : lines) {
                String[] tokens = aline.split("\t");
                depNodes.get(i).setID(Integer.parseInt(tokens[0]));
                depNodes.get(i).setFORM(tokens[1]);
                depNodes.get(i).setLEMMA(tokens[2]);
                depNodes.get(i).setHEAD(depNodes.get(Integer.parseInt(tokens[3])));
                depNodes.get(i).setDEPREL(tokens[4]);
                depNodes.get(i).setCPOSTAG("_");
                depNodes.get(i).setPOSTAG("_");
                depNodes.get(i).setFEATS("_");
                depNodes.get(i).setPHEAD(null);
                depNodes.get(i).setPDEPREL("_");
                depNodes.get(i).addToIndexes(jCas);
                ++i;
            }
        } else if (this.inputFormat.contains("dep")) {
            for (String aline : lines) {
                String[] tokens = aline.split("\t");
                depNodes.get(i).setID(Integer.parseInt(tokens[0]));
                depNodes.get(i).setFORM(tokens[1]);
                depNodes.get(i).setLEMMA(tokens[2]);
                depNodes.get(i).setPOSTAG(tokens[3]);
                depNodes.get(i).setCPOSTAG(tokens[3]);
                depNodes.get(i).setHEAD(depNodes.get(Integer.parseInt(tokens[4])));
                depNodes.get(i).setDEPREL(tokens[5]);
                depNodes.get(i).setFEATS("_");
                depNodes.get(i).setPHEAD(null);
                depNodes.get(i).setPDEPREL("_");
                depNodes.get(i).addToIndexes(jCas);
                ++i;
            }
        } else {
            if (!this.inputFormat.contains("conll")) {
                System.err.println("Warning: Assuming CONLL-x input format");
            }
            for (String aline : lines) {
                String[] tokens = aline.split("\t");
                depNodes.get(i).setID(Integer.parseInt(tokens[0]));
                depNodes.get(i).setFORM(tokens[1]);
                depNodes.get(i).setLEMMA(tokens[2]);
                depNodes.get(i).setCPOSTAG(tokens[3]);
                depNodes.get(i).setPOSTAG(tokens[4]);
                depNodes.get(i).setFEATS(tokens[5]);
                depNodes.get(i).setHEAD(depNodes.get(Integer.parseInt(tokens[6])));
                depNodes.get(i).setDEPREL(tokens[7]);
                depNodes.get(i).setPHEAD(depNodes.get(Integer.parseInt(tokens[8])));
                depNodes.get(i).setPDEPREL(tokens[9]);
                depNodes.get(i).addToIndexes(jCas);
                ++i;
            }
        }
    }

    @Override
    public void close() throws IOException {
        this.input.close();
    }

    @Override
    public Progress[] getProgress() {
        return null;
    }

    @Override
    public boolean hasNext() throws IOException, CollectionException {
        if (this.docCtr == 0 || this.line != null) {
            ++this.docCtr;
            return true;
        }
        return false;
    }
}

