/*
 * Decompiled with CFR 0.152.
 */
package edu.mayo.bmi.uima.core.ae;

import edu.mayo.bmi.nlp.tokenizer.Token;
import edu.mayo.bmi.nlp.tokenizer.Tokenizer;
import edu.mayo.bmi.uima.core.ae.TokenConverter;
import edu.mayo.bmi.uima.core.resource.StringIntegerMapResource;
import edu.mayo.bmi.uima.core.type.BaseToken;
import edu.mayo.bmi.uima.core.type.Segment;
import edu.mayo.bmi.uima.core.util.ParamUtil;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.log4j.Logger;
import org.apache.uima.analysis_engine.ResultSpecification;
import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorContext;
import org.apache.uima.analysis_engine.annotator.AnnotatorContextException;
import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
import org.apache.uima.analysis_engine.annotator.JTextAnnotator_ImplBase;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;

public class TokenizerAnnotator
extends JTextAnnotator_ImplBase {
    private Logger logger = Logger.getLogger(this.getClass().getName());
    public static final int TOKEN_CAP_NONE = 0;
    public static final int TOKEN_CAP_FIRST_ONLY = 1;
    public static final int TOKEN_CAP_MIXED = 2;
    public static final int TOKEN_CAP_ALL = 3;
    public static final int TOKEN_NUM_POS_NONE = 0;
    public static final int TOKEN_NUM_POS_FIRST = 1;
    public static final int TOKEN_NUM_POS_MIDDLE = 2;
    public static final int TOKEN_NUM_POS_LAST = 3;
    public static final int TOKEN_NUM_TYPE_UNKNOWN = 0;
    public static final int TOKEN_NUM_TYPE_INTEGER = 1;
    public static final int TOKEN_NUM_TYPE_DECIMAL = 2;
    public static final String PARAM_SEGMENTS_TO_SKIP = "SegmentsToSkip";
    private final String HYPH_FREQ_TABLE_RESRC_KEY = "HyphFreqTable";
    private AnnotatorContext context;
    private Set skipSegmentsSet;
    private Tokenizer tokenizer;
    private int tokenCount = 0;

    @Override
    public void initialize(AnnotatorContext aContext) throws AnnotatorConfigurationException, AnnotatorInitializationException {
        super.initialize(aContext);
        this.context = aContext;
        try {
            this.configInit();
        }
        catch (AnnotatorContextException ace) {
            throw new AnnotatorConfigurationException(ace);
        }
    }

    private void configInit() throws AnnotatorContextException {
        this.skipSegmentsSet = ParamUtil.getStringParameterValuesSet(PARAM_SEGMENTS_TO_SKIP, this.context);
        int freqCutoff = (Integer)this.context.getConfigParameterValue("FreqCutoff");
        try {
            StringIntegerMapResource strIntMapResrc = (StringIntegerMapResource)this.context.getResourceObject("HyphFreqTable");
            if (strIntMapResrc == null) {
                this.logger.warn("Unable to locate resource with key=HyphFreqTable.  Proceeding without hyphenation support.");
                this.tokenizer = new Tokenizer();
            } else {
                this.logger.info("Hyphen dictionary: " + strIntMapResrc.toString());
                Map hyphMap = strIntMapResrc.getMap();
                this.tokenizer = new Tokenizer(hyphMap, freqCutoff);
            }
        }
        catch (Exception e) {
            throw new AnnotatorContextException(e);
        }
    }

    @Override
    public void process(JCas jcas, ResultSpecification resultSpec) throws AnnotatorProcessException {
        this.logger.info(" process(JCas, ResultSpecification)");
        this.tokenCount = 0;
        JFSIndexRepository indexes = jcas.getJFSIndexRepository();
        FSIterator segmentItr = indexes.getAnnotationIndex(Segment.type).iterator();
        while (segmentItr.hasNext()) {
            Segment sa = (Segment)segmentItr.next();
            String segmentID = sa.getId();
            if (this.skipSegmentsSet.contains(segmentID)) continue;
            this.annotateRange(jcas, sa.getBegin(), sa.getEnd());
        }
    }

    protected void annotateRange(JCas jcas, int beginPos, int endPos) throws AnnotatorProcessException {
        String text = jcas.getDocumentText().substring(beginPos, endPos);
        List<Token> tokens = null;
        try {
            tokens = this.tokenizer.tokenizeAndSort(text);
        }
        catch (Exception e) {
            throw new AnnotatorProcessException(e);
        }
        for (Token token : tokens) {
            BaseToken bta = TokenConverter.convert(token, jcas, beginPos);
            bta.setTokenNumber(this.tokenCount);
            bta.addToIndexes();
            ++this.tokenCount;
        }
    }
}

