package edu.mayo.bmi.nlp.parser;

import clear.ftr.FtrLib;
import clear.reader.AbstractReader;
import edu.mayo.bmi.nlp.parser.type.ConllDependencyNode;
import edu.mayo.bmi.uima.core.type.BaseToken;
import edu.mayo.bmi.uima.core.type.Sentence;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;

/* loaded from: input_file:edu/mayo/bmi/nlp/parser/DependencyFileCollectionReader.class */
public class DependencyFileCollectionReader extends CollectionReader_ImplBase {
    public static final String DEPENDENCY_FILE_PARAM = "DependencyFile";
    public static final String INPUT_FORMAT_PARAM = "InputFormat";
    public static final String TRAINING_MODE_PARAM = "TrainingMode";
    BufferedReader input;
    String inputFormat;
    String line = null;
    private int docCtr = 0;
    boolean trainingMode = false;

    @Override // org.apache.uima.collection.CollectionReader_ImplBase
    public void initialize() throws ResourceInitializationException {
        try {
            this.input = new BufferedReader(new FileReader((String) getConfigParameterValue(DEPENDENCY_FILE_PARAM)));
            this.inputFormat = ((String) getConfigParameterValue(INPUT_FORMAT_PARAM)).toLowerCase();
            this.trainingMode = ((Boolean) getConfigParameterValue(TRAINING_MODE_PARAM)).booleanValue();
        } catch (FileNotFoundException e) {
            throw new ResourceInitializationException(e);
        }
    }

    @Override // org.apache.uima.collection.CollectionReader
    public void getNext(CAS cas) throws IOException, CollectionException {
        try {
            JCas jCas = cas.getJCas();
            int i = 0;
            int i2 = 0;
            int i3 = 0;
            int i4 = 0;
            ArrayList<String> arrayList = new ArrayList<>(50);
            StringBuffer stringBuffer = new StringBuffer();
            this.line = this.input.readLine();
            if (this.line == null) {
                return;
            }
            arrayList.add(this.line);
            while (true) {
                this.line = this.input.readLine();
                String str = arrayList.size() > 0 ? arrayList.get(arrayList.size() - 1) : "";
                if (this.line == null && str.matches("\\A\\s*\\Z")) {
                    jCas.setDocumentText(stringBuffer.toString());
                    break;
                }
                if (this.line.matches("\\A\\s*\\Z") || (this.line == null && !arrayList.get(arrayList.size()).matches("^\\s*$"))) {
                    arrayList.trimToSize();
                    ArrayList<ConllDependencyNode> arrayList2 = new ArrayList<>(arrayList.size());
                    int i5 = i3;
                    int i6 = i5;
                    Iterator<String> it = arrayList.iterator();
                    while (it.hasNext()) {
                        i6 += it.next().split("\t")[1].length() + 1;
                    }
                    if (!this.inputFormat.contains("tok")) {
                        arrayList2.add(new ConllDependencyNode(jCas, i5, i6));
                        arrayList2.get(arrayList2.size() - 1).setID(0);
                        arrayList2.get(arrayList2.size() - 1).addToIndexes(jCas);
                    }
                    if (this.inputFormat.contains("tok")) {
                        Iterator<String> it2 = arrayList.iterator();
                        while (it2.hasNext()) {
                            String[] split = it2.next().split("\t");
                            i4 = i3 + split[1].length();
                            BaseToken baseToken = new BaseToken(jCas, i3, i4);
                            int i7 = i;
                            i++;
                            baseToken.setTokenNumber(i7);
                            baseToken.addToIndexes();
                            stringBuffer.append(String.valueOf(split[1]) + FtrLib.RULE_DELIM);
                            i3 = i4 + 1;
                        }
                    } else if (this.inputFormat.contains("min")) {
                        Iterator<String> it3 = arrayList.iterator();
                        while (it3.hasNext()) {
                            String[] split2 = it3.next().split("\t");
                            i4 = i3 + split2[1].length();
                            if (this.trainingMode) {
                                arrayList2.add(new ConllDependencyNode(jCas, i3, i4));
                            }
                            BaseToken baseToken2 = new BaseToken(jCas, i3, i4);
                            int i8 = i;
                            i++;
                            baseToken2.setTokenNumber(i8);
                            baseToken2.addToIndexes();
                            stringBuffer.append(String.valueOf(split2[1]) + FtrLib.RULE_DELIM);
                            i3 = i4 + 1;
                        }
                    } else if (this.inputFormat.contains("mpos")) {
                        Iterator<String> it4 = arrayList.iterator();
                        while (it4.hasNext()) {
                            String[] split3 = it4.next().split("\t");
                            i4 = i3 + split3[1].length();
                            if (this.trainingMode) {
                                arrayList2.add(new ConllDependencyNode(jCas, i3, i4));
                            }
                            BaseToken baseToken3 = new BaseToken(jCas, i3, i4);
                            int i9 = i;
                            i++;
                            baseToken3.setTokenNumber(i9);
                            baseToken3.setPartOfSpeech(split3[2]);
                            baseToken3.addToIndexes();
                            stringBuffer.append(String.valueOf(split3[1]) + FtrLib.RULE_DELIM);
                            i3 = i4 + 1;
                        }
                    } else if (this.inputFormat.contains("mlem")) {
                        Iterator<String> it5 = arrayList.iterator();
                        while (it5.hasNext()) {
                            String[] split4 = it5.next().split("\t");
                            i4 = i3 + split4[1].length();
                            if (this.trainingMode) {
                                arrayList2.add(new ConllDependencyNode(jCas, i3, i4));
                            }
                            BaseToken baseToken4 = new BaseToken(jCas, i3, i4);
                            int i10 = i;
                            i++;
                            baseToken4.setTokenNumber(i10);
                            baseToken4.setNormalizedForm(split4[2]);
                            baseToken4.addToIndexes();
                            stringBuffer.append(String.valueOf(split4[1]) + FtrLib.RULE_DELIM);
                            i3 = i4 + 1;
                        }
                    } else if (this.inputFormat.contains(AbstractReader.FORMAT_DEP)) {
                        Iterator<String> it6 = arrayList.iterator();
                        while (it6.hasNext()) {
                            String[] split5 = it6.next().split("\t");
                            i4 = i3 + split5[1].length();
                            if (this.trainingMode) {
                                arrayList2.add(new ConllDependencyNode(jCas, i3, i4));
                            }
                            BaseToken baseToken5 = new BaseToken(jCas, i3, i4);
                            int i11 = i;
                            i++;
                            baseToken5.setTokenNumber(i11);
                            baseToken5.setNormalizedForm(split5[2]);
                            baseToken5.setPartOfSpeech(split5[3]);
                            baseToken5.addToIndexes();
                            stringBuffer.append(String.valueOf(split5[1]) + FtrLib.RULE_DELIM);
                            i3 = i4 + 1;
                        }
                    } else {
                        if (!this.inputFormat.contains(AbstractReader.FORMAT_CONLL)) {
                            System.err.println("Warning: Assuming CONLL-x input format");
                        }
                        Iterator<String> it7 = arrayList.iterator();
                        while (it7.hasNext()) {
                            String[] split6 = it7.next().split("\t");
                            i4 = i3 + split6[1].length();
                            if (this.trainingMode) {
                                arrayList2.add(new ConllDependencyNode(jCas, i3, i4));
                            }
                            BaseToken baseToken6 = new BaseToken(jCas, i3, i4);
                            int i12 = i;
                            i++;
                            baseToken6.setTokenNumber(i12);
                            baseToken6.setNormalizedForm(split6[2]);
                            baseToken6.setPartOfSpeech(split6[4]);
                            baseToken6.addToIndexes();
                            stringBuffer.append(String.valueOf(split6[1]) + FtrLib.RULE_DELIM);
                            i3 = i4 + 1;
                        }
                    }
                    Sentence sentence = new Sentence(jCas, i5, i4);
                    sentence.setSentenceNumber(i2);
                    sentence.addToIndexes();
                    if (!this.inputFormat.contains("tok") && this.trainingMode) {
                        setDependencyNodesFromTabbedText(jCas, arrayList, stringBuffer, arrayList2);
                    }
                    if (this.line == null) {
                        jCas.setDocumentText(stringBuffer.toString());
                        break;
                    } else {
                        i2++;
                        arrayList = new ArrayList<>(50);
                    }
                } else {
                    arrayList.add(this.line);
                }
            }
            this.line = null;
        } catch (CASException e) {
            throw new CollectionException(e);
        }
    }

    private void setDependencyNodesFromTabbedText(JCas jCas, ArrayList<String> arrayList, StringBuffer stringBuffer, ArrayList<ConllDependencyNode> arrayList2) {
        int i = 1;
        if (this.inputFormat.contains("min")) {
            Iterator<String> it = arrayList.iterator();
            while (it.hasNext()) {
                String[] split = it.next().split("\t");
                arrayList2.get(i).setID(Integer.parseInt(split[0]));
                arrayList2.get(i).setFORM(split[1]);
                arrayList2.get(i).setHEAD(arrayList2.get(Integer.parseInt(split[2])));
                arrayList2.get(i).setDEPREL(split[3]);
                arrayList2.get(i).setLEMMA("_");
                arrayList2.get(i).setCPOSTAG("_");
                arrayList2.get(i).setPOSTAG("_");
                arrayList2.get(i).setFEATS("_");
                arrayList2.get(i).setPHEAD(null);
                arrayList2.get(i).setPDEPREL("_");
                arrayList2.get(i).addToIndexes(jCas);
                i++;
            }
            return;
        }
        if (this.inputFormat.contains("mpos")) {
            Iterator<String> it2 = arrayList.iterator();
            while (it2.hasNext()) {
                String[] split2 = it2.next().split("\t");
                arrayList2.get(i).setID(Integer.parseInt(split2[0]));
                arrayList2.get(i).setFORM(split2[1]);
                arrayList2.get(i).setPOSTAG(split2[2]);
                arrayList2.get(i).setCPOSTAG(split2[2]);
                arrayList2.get(i).setHEAD(arrayList2.get(Integer.parseInt(split2[3])));
                arrayList2.get(i).setDEPREL(split2[4]);
                arrayList2.get(i).setLEMMA("_");
                arrayList2.get(i).setFEATS("_");
                arrayList2.get(i).setPHEAD(null);
                arrayList2.get(i).setPDEPREL("_");
                arrayList2.get(i).addToIndexes(jCas);
                i++;
            }
            return;
        }
        if (this.inputFormat.contains("mlem")) {
            Iterator<String> it3 = arrayList.iterator();
            while (it3.hasNext()) {
                String[] split3 = it3.next().split("\t");
                arrayList2.get(i).setID(Integer.parseInt(split3[0]));
                arrayList2.get(i).setFORM(split3[1]);
                arrayList2.get(i).setLEMMA(split3[2]);
                arrayList2.get(i).setHEAD(arrayList2.get(Integer.parseInt(split3[3])));
                arrayList2.get(i).setDEPREL(split3[4]);
                arrayList2.get(i).setCPOSTAG("_");
                arrayList2.get(i).setPOSTAG("_");
                arrayList2.get(i).setFEATS("_");
                arrayList2.get(i).setPHEAD(null);
                arrayList2.get(i).setPDEPREL("_");
                arrayList2.get(i).addToIndexes(jCas);
                i++;
            }
            return;
        }
        if (this.inputFormat.contains(AbstractReader.FORMAT_DEP)) {
            Iterator<String> it4 = arrayList.iterator();
            while (it4.hasNext()) {
                String[] split4 = it4.next().split("\t");
                arrayList2.get(i).setID(Integer.parseInt(split4[0]));
                arrayList2.get(i).setFORM(split4[1]);
                arrayList2.get(i).setLEMMA(split4[2]);
                arrayList2.get(i).setPOSTAG(split4[3]);
                arrayList2.get(i).setCPOSTAG(split4[3]);
                arrayList2.get(i).setHEAD(arrayList2.get(Integer.parseInt(split4[4])));
                arrayList2.get(i).setDEPREL(split4[5]);
                arrayList2.get(i).setFEATS("_");
                arrayList2.get(i).setPHEAD(null);
                arrayList2.get(i).setPDEPREL("_");
                arrayList2.get(i).addToIndexes(jCas);
                i++;
            }
            return;
        }
        if (!this.inputFormat.contains(AbstractReader.FORMAT_CONLL)) {
            System.err.println("Warning: Assuming CONLL-x input format");
        }
        Iterator<String> it5 = arrayList.iterator();
        while (it5.hasNext()) {
            String[] split5 = it5.next().split("\t");
            arrayList2.get(i).setID(Integer.parseInt(split5[0]));
            arrayList2.get(i).setFORM(split5[1]);
            arrayList2.get(i).setLEMMA(split5[2]);
            arrayList2.get(i).setCPOSTAG(split5[3]);
            arrayList2.get(i).setPOSTAG(split5[4]);
            arrayList2.get(i).setFEATS(split5[5]);
            arrayList2.get(i).setHEAD(arrayList2.get(Integer.parseInt(split5[6])));
            arrayList2.get(i).setDEPREL(split5[7]);
            arrayList2.get(i).setPHEAD(arrayList2.get(Integer.parseInt(split5[8])));
            arrayList2.get(i).setPDEPREL(split5[9]);
            arrayList2.get(i).addToIndexes(jCas);
            i++;
        }
    }

    @Override // org.apache.uima.collection.base_cpm.BaseCollectionReader
    public void close() throws IOException {
        this.input.close();
    }

    @Override // org.apache.uima.collection.base_cpm.BaseCollectionReader
    public Progress[] getProgress() {
        return null;
    }

    @Override // org.apache.uima.collection.base_cpm.BaseCollectionReader
    public boolean hasNext() throws IOException, CollectionException {
        if (this.docCtr != 0 && this.line == null) {
            return false;
        }
        this.docCtr++;
        return true;
    }
}
