/*
 * Decompiled with CFR 0.152.
 */
package edu.mayo.bmi.uima.lvg.ae;

import edu.mayo.bmi.uima.core.type.Lemma;
import edu.mayo.bmi.uima.core.type.Segment;
import edu.mayo.bmi.uima.core.type.WordToken;
import edu.mayo.bmi.uima.core.util.ListFactory;
import edu.mayo.bmi.uima.lvg.resource.LvgCmdApiResource;
import gov.nih.nlm.nls.lvg.Api.LvgCmdApi;
import gov.nih.nlm.nls.lvg.Api.LvgLexItemApi;
import gov.nih.nlm.nls.lvg.Lib.Category;
import gov.nih.nlm.nls.lvg.Lib.LexItem;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.Vector;
import org.apache.log4j.Logger;
import org.apache.uima.analysis_engine.ResultSpecification;
import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorContext;
import org.apache.uima.analysis_engine.annotator.AnnotatorContextException;
import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
import org.apache.uima.analysis_engine.annotator.JTextAnnotator_ImplBase;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.jcas.cas.FSList;
import org.apache.uima.jcas.cas.TOP;

public class LvgAnnotator
extends JTextAnnotator_ImplBase {
    public static final String PARAM_POST_LEMMAS = "PostLemmas";
    public static final String PARAM_USE_LEMMA_CACHE = "UseLemmaCache";
    public static final String PARAM_LEMMA_CACHE_FILE_LOCATION = "LemmaCacheFileLocation";
    public static final String PARAM_LEMMA_CACHE_FREQUENCY_CUTOFF = "LemmaCacheFrequencyCutoff";
    private Logger logger = Logger.getLogger(this.getClass().getName());
    private final String LVGCMDAPI_RESRC_KEY = "LvgCmdApi";
    private LvgCmdApi lvgCmd;
    private LvgLexItemApi lvgLexItem;
    private AnnotatorContext context;
    private boolean useSegments;
    private Set skipSegmentsSet;
    private boolean useCmdCache;
    private String cmdCacheFileLocation;
    private int cmdCacheFreqCutoff;
    private Map xeroxTreebankMap;
    private boolean postLemmas;
    private boolean useLemmaCache;
    private String lemmaCacheFileLocation;
    private int lemmaCacheFreqCutoff;
    private Map normCacheMap;
    private Map lemmaCacheMap;
    private Set exclusionSet;

    @Override
    public void initialize(AnnotatorContext aContext) throws AnnotatorConfigurationException, AnnotatorInitializationException {
        super.initialize(aContext);
        this.context = aContext;
        try {
            this.configInit();
        }
        catch (AnnotatorContextException ace) {
            throw new AnnotatorConfigurationException(ace);
        }
        try {
            LvgCmdApiResource lvgResource = (LvgCmdApiResource)this.context.getResourceObject("LvgCmdApi");
            if (lvgResource == null) {
                throw new AnnotatorInitializationException(new Exception("Unable to locate resource with key=LvgCmdApi."));
            }
            this.lvgCmd = lvgResource.getLvg();
            if (this.useCmdCache) {
                this.logger.info("Loading Cmd cache=" + this.cmdCacheFileLocation);
                this.loadCmdCacheFile(this.cmdCacheFileLocation);
                this.logger.info("Loaded " + this.normCacheMap.size() + " entries");
            }
            if (this.postLemmas) {
                this.lvgLexItem = lvgResource.getLvgLex();
                if (this.useLemmaCache) {
                    this.logger.info("Loading Lemma cache=" + this.lemmaCacheFileLocation);
                    this.loadLemmaCacheFile(this.lemmaCacheFileLocation);
                    this.logger.info("Loaded " + this.lemmaCacheMap.size() + " entries");
                }
            }
        }
        catch (Exception e) {
            throw new AnnotatorConfigurationException(e);
        }
    }

    private void configInit() throws AnnotatorContextException {
        Boolean useLemmaCache;
        this.useSegments = (Boolean)this.context.getConfigParameterValue("UseSegments");
        String[] skipSegmentIDs = (String[])this.context.getConfigParameterValue("SegmentsToSkip");
        this.skipSegmentsSet = new HashSet();
        int i = 0;
        while (i < skipSegmentIDs.length) {
            this.skipSegmentsSet.add(skipSegmentIDs[i]);
            ++i;
        }
        String[] xtMaps = (String[])this.context.getConfigParameterValue("XeroxTreebankMap");
        this.xeroxTreebankMap = new HashMap();
        int i2 = 0;
        while (i2 < xtMaps.length) {
            StringTokenizer tokenizer = new StringTokenizer(xtMaps[i2], "|");
            if (tokenizer.countTokens() == 2) {
                String xTag = tokenizer.nextToken();
                String tTag = tokenizer.nextToken();
                this.xeroxTreebankMap.put(xTag, tTag);
            }
            ++i2;
        }
        this.useCmdCache = (Boolean)this.context.getConfigParameterValue("UseCmdCache");
        this.cmdCacheFileLocation = (String)this.context.getConfigParameterValue("CmdCacheFileLocation");
        this.cmdCacheFreqCutoff = (Integer)this.context.getConfigParameterValue("CmdCacheFrequencyCutoff");
        String[] wordsToExclude = (String[])this.context.getConfigParameterValue("ExclusionSet");
        this.exclusionSet = new HashSet();
        int i3 = 0;
        while (i3 < wordsToExclude.length) {
            this.exclusionSet.add(wordsToExclude[i3]);
            ++i3;
        }
        Boolean bPostLemmas = (Boolean)this.context.getConfigParameterValue(PARAM_POST_LEMMAS);
        boolean bl = this.postLemmas = bPostLemmas == null ? false : bPostLemmas;
        if (this.postLemmas && (useLemmaCache = Boolean.valueOf((useLemmaCache = (Boolean)this.context.getConfigParameterValue(PARAM_USE_LEMMA_CACHE)) == null ? false : useLemmaCache)).booleanValue()) {
            this.lemmaCacheFileLocation = (String)this.context.getConfigParameterValue(PARAM_LEMMA_CACHE_FILE_LOCATION);
            if (this.lemmaCacheFileLocation == null) {
                throw new AnnotatorContextException(new Exception("Parameter for LemmaCacheFileLocation was not set."));
            }
            Integer lemmaCacheFreqCutoff = (Integer)this.context.getConfigParameterValue(PARAM_LEMMA_CACHE_FREQUENCY_CUTOFF);
            lemmaCacheFreqCutoff = lemmaCacheFreqCutoff == null ? Integer.valueOf(20) : Integer.valueOf(lemmaCacheFreqCutoff);
        }
    }

    @Override
    public void process(JCas jcas, ResultSpecification resultSpec) throws AnnotatorProcessException {
        this.logger.info(" process(JCas, ResultSpecification)");
        String text = jcas.getDocumentText();
        try {
            if (this.useSegments) {
                JFSIndexRepository indexes = jcas.getJFSIndexRepository();
                FSIterator segmentItr = indexes.getAnnotationIndex(Segment.type).iterator();
                while (segmentItr.hasNext()) {
                    Segment segmentAnnotation = (Segment)segmentItr.next();
                    String segmentID = segmentAnnotation.getId();
                    if (this.skipSegmentsSet.contains(segmentID)) continue;
                    int start = segmentAnnotation.getBegin();
                    int end = segmentAnnotation.getEnd();
                    this.annotateRange(jcas, text, start, end, resultSpec);
                }
            } else {
                this.annotateRange(jcas, text, 0, text.length(), resultSpec);
            }
        }
        catch (Exception e) {
            throw new AnnotatorProcessException(e);
        }
    }

    protected void annotateRange(JCas jcas, String text, int rangeBegin, int rangeEnd, ResultSpecification resultSpec) throws AnnotatorContextException {
        JFSIndexRepository indexes = jcas.getJFSIndexRepository();
        FSIterator wordItr = indexes.getAnnotationIndex(WordToken.type).iterator();
        while (wordItr.hasNext()) {
            WordToken wordAnnotation = (WordToken)wordItr.next();
            if (wordAnnotation.getBegin() < rangeBegin || wordAnnotation.getEnd() > rangeEnd) continue;
            String word = text.substring(wordAnnotation.getBegin(), wordAnnotation.getEnd());
            String suggestion = wordAnnotation.getSuggestion();
            if (suggestion != null && suggestion.length() > 0) {
                word = suggestion;
            }
            if (this.exclusionSet.contains(word)) continue;
            this.setCanonicalForm(wordAnnotation, word);
            if (!this.postLemmas) continue;
            this.setLemma(wordAnnotation, word, jcas);
        }
    }

    private void setCanonicalForm(WordToken wordAnnotation, String word) throws AnnotatorContextException {
        String canonicalForm = null;
        if (this.useCmdCache) {
            canonicalForm = (String)this.normCacheMap.get(word);
        }
        if (canonicalForm == null) {
            try {
                String out = this.lvgCmd.MutateToString(word);
                String[] output = out.split("\\|");
                if (output != null && output.length >= 2 && !output[1].matches("No Output")) {
                    canonicalForm = output[1];
                }
            }
            catch (Exception e) {
                throw new AnnotatorContextException(e);
            }
        }
        if (canonicalForm != null) {
            wordAnnotation.setCanonicalForm(canonicalForm);
        }
    }

    private void setLemma(WordToken wordAnnotation, String word, JCas jcas) throws AnnotatorContextException {
        Set lemmaSet;
        HashMap lemmaMap = null;
        if (this.useLemmaCache && (lemmaSet = (Set)this.lemmaCacheMap.get(word)) != null) {
            lemmaMap = new HashMap();
            for (LemmaLocalClass l : lemmaSet) {
                lemmaMap.put(l.word, l.posSet);
            }
        }
        if (lemmaMap == null) {
            lemmaMap = new HashMap();
            try {
                Vector<LexItem> lexItems = this.lvgLexItem.MutateLexItem(word);
                for (LexItem li : lexItems) {
                    Category c = li.GetTargetCategory();
                    String lemmaStr = li.GetTargetTerm();
                    long[] bitValues = Category.ToValuesArray(c.GetValue());
                    int i = 0;
                    while (i < bitValues.length) {
                        String lemmaPos = Category.ToName(bitValues[i]);
                        String treebankTag = (String)this.xeroxTreebankMap.get(lemmaPos);
                        if (treebankTag != null) {
                            Set posSet = null;
                            posSet = lemmaMap.containsKey(lemmaStr) ? (Set)lemmaMap.get(lemmaStr) : new HashSet();
                            posSet.add(treebankTag);
                            lemmaMap.put(lemmaStr, posSet);
                        }
                        ++i;
                    }
                }
            }
            catch (Exception e) {
                throw new AnnotatorContextException(e);
            }
        }
        ArrayList<Lemma> lemmas = new ArrayList<Lemma>(lemmaMap.keySet().size());
        for (String form : lemmaMap.keySet()) {
            Set posTagSet = (Set)lemmaMap.get(form);
            for (String pos : posTagSet) {
                Lemma lemma = new Lemma(jcas);
                lemma.setKey(form);
                lemma.setPosTag(pos);
                lemmas.add(lemma);
            }
        }
        TOP[] lemmaArray = lemmas.toArray(new Lemma[lemmas.size()]);
        FSList fsList = ListFactory.buildList(jcas, lemmaArray);
        wordAnnotation.setLemmaEntries(fsList);
    }

    private void loadCmdCacheFile(String cpLocation) throws FileNotFoundException, IOException {
        InputStream inStream = this.getClass().getResourceAsStream(cpLocation);
        if (inStream == null) {
            throw new FileNotFoundException("Unable to find: " + cpLocation);
        }
        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
        this.normCacheMap = new HashMap();
        String line = br.readLine();
        while (line != null) {
            StringTokenizer st = new StringTokenizer(line, "|");
            if (st.countTokens() == 7) {
                int freq = Integer.parseInt(st.nextToken());
                if (freq > this.cmdCacheFreqCutoff) {
                    String origWord = st.nextToken();
                    String normWord = st.nextToken();
                    if (!this.normCacheMap.containsKey(origWord)) {
                        this.normCacheMap.put(origWord, normWord);
                    }
                } else {
                    this.logger.debug("Discarding norm cache line due to frequency cutoff: " + line);
                }
            } else {
                this.logger.warn("Invalid LVG norm cache line: " + line);
            }
            line = br.readLine();
        }
    }

    private void loadLemmaCacheFile(String cpLocation) throws FileNotFoundException, IOException {
        InputStream inStream = this.getClass().getResourceAsStream(cpLocation);
        if (inStream == null) {
            throw new FileNotFoundException("Unable to find: " + cpLocation);
        }
        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
        this.lemmaCacheMap = new HashMap();
        String line = br.readLine();
        while (line != null) {
            StringTokenizer st = new StringTokenizer(line, "|");
            if (st.countTokens() == 4) {
                int freq = Integer.parseInt(st.nextToken());
                if (freq > this.lemmaCacheFreqCutoff) {
                    String origWord = st.nextToken();
                    String lemmaWord = st.nextToken();
                    String combinedCategories = st.nextToken();
                    combinedCategories = combinedCategories.substring(1, combinedCategories.length() - 1);
                    LemmaLocalClass l = new LemmaLocalClass();
                    l.word = lemmaWord;
                    l.posSet = new HashSet();
                    long bitVector = Category.ToValue(combinedCategories);
                    long[] bitValues = Category.ToValuesArray(bitVector);
                    int i = 0;
                    while (i < bitValues.length) {
                        String pos = Category.ToName(bitValues[i]);
                        String treebankTag = (String)this.xeroxTreebankMap.get(pos);
                        if (treebankTag != null) {
                            l.posSet.add(treebankTag);
                        }
                        ++i;
                    }
                    Set<LemmaLocalClass> lemmaSet = null;
                    lemmaSet = !this.lemmaCacheMap.containsKey(origWord) ? new HashSet<LemmaLocalClass>() : (Set)this.lemmaCacheMap.get(origWord);
                    lemmaSet.add(l);
                    this.lemmaCacheMap.put(origWord, lemmaSet);
                } else {
                    this.logger.debug("Discarding lemma cache line due to frequency cutoff: " + line);
                }
            } else {
                this.logger.warn("Invalid LVG lemma cache line: " + line);
            }
            line = br.readLine();
        }
    }

    class LemmaLocalClass {
        public String word;
        public Set posSet;

        LemmaLocalClass() {
        }
    }
}

