package edu.mayo.bmi.uima.core.ae;

import edu.mayo.bmi.nlp.tokenizer.Token;
import edu.mayo.bmi.nlp.tokenizer.Tokenizer;
import edu.mayo.bmi.uima.core.resource.StringIntegerMapResource;
import edu.mayo.bmi.uima.core.type.BaseToken;
import edu.mayo.bmi.uima.core.type.Segment;
import edu.mayo.bmi.uima.core.util.ParamUtil;
import java.util.Iterator;
import java.util.Set;
import org.apache.log4j.Logger;
import org.apache.uima.analysis_engine.ResultSpecification;
import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorContext;
import org.apache.uima.analysis_engine.annotator.AnnotatorContextException;
import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
import org.apache.uima.analysis_engine.annotator.JTextAnnotator_ImplBase;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.jcas.JCas;

/* loaded from: input_file:edu/mayo/bmi/uima/core/ae/TokenizerAnnotator.class */
public class TokenizerAnnotator extends JTextAnnotator_ImplBase {
    public static final int TOKEN_CAP_NONE = 0;
    public static final int TOKEN_CAP_FIRST_ONLY = 1;
    public static final int TOKEN_CAP_MIXED = 2;
    public static final int TOKEN_CAP_ALL = 3;
    public static final int TOKEN_NUM_POS_NONE = 0;
    public static final int TOKEN_NUM_POS_FIRST = 1;
    public static final int TOKEN_NUM_POS_MIDDLE = 2;
    public static final int TOKEN_NUM_POS_LAST = 3;
    public static final int TOKEN_NUM_TYPE_UNKNOWN = 0;
    public static final int TOKEN_NUM_TYPE_INTEGER = 1;
    public static final int TOKEN_NUM_TYPE_DECIMAL = 2;
    public static final String PARAM_SEGMENTS_TO_SKIP = "SegmentsToSkip";
    private AnnotatorContext context;
    private Set skipSegmentsSet;
    private Tokenizer tokenizer;
    private Logger logger = Logger.getLogger(getClass().getName());
    private final String HYPH_FREQ_TABLE_RESRC_KEY = "HyphFreqTable";
    private int tokenCount = 0;

    @Override // org.apache.uima.analysis_engine.annotator.Annotator_ImplBase, org.apache.uima.analysis_engine.annotator.BaseAnnotator
    public void initialize(AnnotatorContext annotatorContext) throws AnnotatorConfigurationException, AnnotatorInitializationException {
        super.initialize(annotatorContext);
        this.context = annotatorContext;
        try {
            configInit();
        } catch (AnnotatorContextException e) {
            throw new AnnotatorConfigurationException(e);
        }
    }

    private void configInit() throws AnnotatorContextException {
        this.skipSegmentsSet = ParamUtil.getStringParameterValuesSet("SegmentsToSkip", this.context);
        int intValue = ((Integer) this.context.getConfigParameterValue("FreqCutoff")).intValue();
        try {
            StringIntegerMapResource stringIntegerMapResource = (StringIntegerMapResource) this.context.getResourceObject("HyphFreqTable");
            if (stringIntegerMapResource == null) {
                this.logger.warn("Unable to locate resource with key=HyphFreqTable.  Proceeding without hyphenation support.");
                this.tokenizer = new Tokenizer();
            } else {
                this.logger.info("Hyphen dictionary: " + stringIntegerMapResource.toString());
                this.tokenizer = new Tokenizer(stringIntegerMapResource.getMap(), intValue);
            }
        } catch (Exception e) {
            throw new AnnotatorContextException(e);
        }
    }

    @Override // org.apache.uima.analysis_engine.annotator.JTextAnnotator
    public void process(JCas jCas, ResultSpecification resultSpecification) throws AnnotatorProcessException {
        this.logger.info(" process(JCas, ResultSpecification)");
        this.tokenCount = 0;
        FSIterator it = jCas.getJFSIndexRepository().getAnnotationIndex(Segment.type).iterator();
        while (it.hasNext()) {
            Segment segment = (Segment) it.next();
            if (!this.skipSegmentsSet.contains(segment.getId())) {
                annotateRange(jCas, segment.getBegin(), segment.getEnd());
            }
        }
    }

    protected void annotateRange(JCas jCas, int i, int i2) throws AnnotatorProcessException {
        try {
            Iterator<Token> it = this.tokenizer.tokenizeAndSort(jCas.getDocumentText().substring(i, i2)).iterator();
            while (it.hasNext()) {
                BaseToken convert = TokenConverter.convert(it.next(), jCas, i);
                convert.setTokenNumber(this.tokenCount);
                convert.addToIndexes();
                this.tokenCount++;
            }
        } catch (Exception e) {
            throw new AnnotatorProcessException(e);
        }
    }
}
