package edu.mayo.bmi.uima.pos_tagger;

import clear.ftr.FtrLib;
import edu.mayo.bmi.uima.core.type.BaseToken;
import edu.mayo.bmi.uima.core.type.Sentence;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;

/* loaded from: input_file:edu/mayo/bmi/uima/pos_tagger/OpenNLPPOSCollectionReader.class */
public class OpenNLPPOSCollectionReader extends CollectionReader_ImplBase {
    public static final String POS_DATA_FILE_PARAM = "PosDataFile";
    public static final String LOAD_WORDS_ONLY_PARAM = "LoadWordsOnly";
    BufferedReader input;
    String line = null;
    boolean loadWordsOnly;

    @Override // org.apache.uima.collection.CollectionReader_ImplBase
    public void initialize() throws ResourceInitializationException {
        try {
            this.input = new BufferedReader(new FileReader((String) getConfigParameterValue(POS_DATA_FILE_PARAM)));
            Boolean bool = (Boolean) getConfigParameterValue(LOAD_WORDS_ONLY_PARAM);
            this.loadWordsOnly = bool == null ? false : bool.booleanValue();
        } catch (FileNotFoundException e) {
            throw new ResourceInitializationException(e);
        }
    }

    @Override // org.apache.uima.collection.CollectionReader
    public void getNext(CAS cas) throws IOException, CollectionException {
        try {
            if (hasNext()) {
                JCas jCas = cas.getJCas();
                String[] split = this.line.split(FtrLib.RULE_DELIM);
                int i = 0;
                int i2 = 0;
                int i3 = 0;
                StringBuffer stringBuffer = new StringBuffer();
                for (String str : split) {
                    int lastIndexOf = str.lastIndexOf("_");
                    if (lastIndexOf == str.length() - 1) {
                        lastIndexOf = str.substring(0, str.length() - 1).lastIndexOf("_");
                    }
                    if (lastIndexOf == -1) {
                        this.line = null;
                        throw new CollectionException("There is a problem in your training data: " + str + " does not conform to the format WORD_TAG.", null);
                    }
                    String substring = str.substring(0, lastIndexOf);
                    i2 = i + substring.length();
                    BaseToken baseToken = new BaseToken(jCas, i, i2);
                    if (!this.loadWordsOnly) {
                        baseToken.setPartOfSpeech(str.substring(lastIndexOf + 1));
                    }
                    int i4 = i3;
                    i3++;
                    baseToken.setTokenNumber(i4);
                    baseToken.addToIndexes();
                    stringBuffer.append(String.valueOf(substring) + FtrLib.RULE_DELIM);
                    i = i2 + 1;
                }
                Sentence sentence = new Sentence(jCas, 0, i2);
                sentence.setSentenceNumber(0);
                sentence.addToIndexes();
                jCas.setDocumentText(stringBuffer.toString());
            }
            this.line = null;
        } catch (CASException e) {
            throw new CollectionException(e);
        }
    }

    @Override // org.apache.uima.collection.base_cpm.BaseCollectionReader
    public void close() throws IOException {
        this.input.close();
    }

    @Override // org.apache.uima.collection.base_cpm.BaseCollectionReader
    public Progress[] getProgress() {
        return null;
    }

    @Override // org.apache.uima.collection.base_cpm.BaseCollectionReader
    public boolean hasNext() throws IOException, CollectionException {
        if (this.line == null) {
            this.line = this.input.readLine();
        }
        return this.line != null;
    }
}
